summaryrefslogtreecommitdiff
path: root/makima/frontend/src/routes
diff options
context:
space:
mode:
authorsoryu <soryu@soryu.co>2025-12-22 04:50:25 +0000
committersoryu <soryu@soryu.co>2025-12-23 14:47:18 +0000
commit0741a8b8e9a2099c82bff6d6b9ebbce9c07cad53 (patch)
tree88cbd5fecb9ca72a04aa07f1a6db4e1a751b1fd7 /makima/frontend/src/routes
parentaee2e4e784afd6d115fb5f7b40284c4efd2da966 (diff)
downloadsoryu-0741a8b8e9a2099c82bff6d6b9ebbce9c07cad53.tar.gz
soryu-0741a8b8e9a2099c82bff6d6b9ebbce9c07cad53.zip
Update makima FE to add initial listening system
Diffstat (limited to 'makima/frontend/src/routes')
-rw-r--r--makima/frontend/src/routes/_index.tsx113
-rw-r--r--makima/frontend/src/routes/listen.tsx158
2 files changed, 271 insertions, 0 deletions
diff --git a/makima/frontend/src/routes/_index.tsx b/makima/frontend/src/routes/_index.tsx
new file mode 100644
index 0000000..568e300
--- /dev/null
+++ b/makima/frontend/src/routes/_index.tsx
@@ -0,0 +1,113 @@
+import { Masthead } from "../components/Masthead";
+import { Logo } from "../components/Logo";
+import { RewriteLink } from "../components/RewriteLink";
+
+export default function HomePage() {
+ return (
+ <div className="relative z-10 min-h-screen flex flex-col">
+ <Masthead showTicker showNav />
+
+ <main className="flex-1 p-6 md:p-8">
+ <section className="grid grid-cols-1 md:grid-cols-[1.1fr_0.9fr] gap-6 items-center mb-8">
+ <div className="order-2 md:order-1">
+ <span className="inline-block px-2 py-1 border border-[#3f6fb3] bg-[#0f1c2f] text-[#9bc3ff] font-mono text-xs tracking-wide uppercase mb-3">
+ Real-time STT Platform
+ </span>
+ <h2 className="m-0 mb-3 text-xl text-[#f0f5ff] tracking-wide">
+ リアルタイム音声認識
+ </h2>
+ <p className="my-2 text-[#e4edff]">
+ Makima provides real-time speech-to-text transcription with speaker
+ diarization. Stream audio from your microphone or upload files for
+ instant transcription.
+ </p>
+ <p className="my-2 text-[#e4edff]">
+ 高精度なリアルタイム音声認識エンジン。WebSocket
+ 経由でストリーミング処理を行い、話者分離にも対応。
+ </p>
+
+ <div className="flex flex-wrap gap-3 mt-4">
+ <RewriteLink to="/listen" className="!py-2 !px-3 text-sm">
+ Start Listening
+ </RewriteLink>
+ <RewriteLink href="https://github.com/soryu-co" external className="!py-2 !px-3 text-sm">
+ GitHub
+ </RewriteLink>
+ <RewriteLink href="https://soryu.co" external className="!py-2 !px-3 text-sm">
+ soryu.co
+ </RewriteLink>
+ </div>
+ </div>
+
+ <div className="order-1 md:order-2 flex justify-center">
+ <Logo size={160} />
+ </div>
+ </section>
+
+ <div className="panel p-4 md:p-5 relative">
+ <span className="absolute -top-2.5 left-3 px-1.5 py-0.5 bg-[#0f1c2f] border border-[#3f6fb3] font-mono text-xs tracking-wide">
+ FEATURES//MAKIMA
+ </span>
+ <h3 className="mt-2 mb-3 text-lg text-[#eef5ff]">Platform Features</h3>
+ <div className="grid grid-cols-1 md:grid-cols-2 gap-3 mt-3 text-sm text-[#e4edff]">
+ <div className="border border-[#3f6fb3] bg-[#0d1b2d] p-3 font-mono tracking-tight">
+ <strong className="text-[#9bc3ff]">Real-time Streaming</strong>
+ <br />
+ WebSocket-based audio streaming with low latency transcription
+ </div>
+ <div className="border border-[#3f6fb3] bg-[#0d1b2d] p-3 font-mono tracking-tight">
+ <strong className="text-[#9bc3ff]">Speaker Diarization</strong>
+ <br />
+ Automatic speaker identification and labeling
+ </div>
+ <div className="border border-[#3f6fb3] bg-[#0d1b2d] p-3 font-mono tracking-tight">
+ <strong className="text-[#9bc3ff]">End-of-Utterance</strong>
+ <br />
+ Smart detection of speech boundaries
+ </div>
+ <div className="border border-[#3f6fb3] bg-[#0d1b2d] p-3 font-mono tracking-tight">
+ <strong className="text-[#9bc3ff]">Multi-format</strong>
+ <br />
+ Support for PCM32F and PCM16 audio encoding
+ </div>
+ </div>
+ </div>
+
+ <div className="mt-6 border border-[#0f3c78] bg-[#0f3c78] text-[#dbe7ff] shadow-[inset_0_0_0_1px_rgba(255,255,255,0.1)]">
+ <header className="border-b border-[rgba(197,219,255,0.4)] px-3 py-2.5 font-mono tracking-wide text-xs flex justify-between items-center">
+ <strong>API // makima.jp</strong>
+ <span className="text-[#92b8ff]">endpoint: wss://api.makima.jp</span>
+ </header>
+ <pre className="m-0 px-3 py-4 font-mono text-xs leading-relaxed bg-[linear-gradient(0deg,rgba(255,255,255,0.05)_1px,rgba(15,60,120,0.25)_1px)]">
+{`[001] endpoint ............... /api/v1/listen
+[002] protocol ............... WebSocket
+[003] encoding ............... pcm32f / pcm16
+[004] sample_rate ............ 16000 Hz (default)
+[005] features ............... transcription, diarization, eou`}
+ </pre>
+ </div>
+ </main>
+
+ <footer className="px-4 py-3 border-t-4 border-double border-[#23477a] bg-gradient-to-b from-[rgba(11,18,32,0.95)] to-[rgba(14,26,45,0.7)] text-xs text-[#e4edff] text-center">
+ <p className="m-1">&copy; makima.jp</p>
+ <p className="m-1">
+ <a
+ href="https://makima.jp"
+ className="text-[#dbe7ff] underline"
+ >
+ https://makima.jp
+ </a>
+ {" / "}
+ <a
+ href="https://soryu.co"
+ target="_blank"
+ rel="noopener noreferrer"
+ className="text-[#dbe7ff] underline"
+ >
+ https://soryu.co
+ </a>
+ </p>
+ </footer>
+ </div>
+ );
+}
diff --git a/makima/frontend/src/routes/listen.tsx b/makima/frontend/src/routes/listen.tsx
new file mode 100644
index 0000000..06769fd
--- /dev/null
+++ b/makima/frontend/src/routes/listen.tsx
@@ -0,0 +1,158 @@
+import { useState, useCallback, useMemo, useEffect, useRef } from "react";
+import { Masthead } from "../components/Masthead";
+import { SpeakerPanel } from "../components/listen/SpeakerPanel";
+import { TranscriptPanel } from "../components/listen/TranscriptPanel";
+import { ControlPanel } from "../components/listen/ControlPanel";
+import { useMicrophone } from "../hooks/useMicrophone";
+import { useWebSocket } from "../hooks/useWebSocket";
+
+export default function ListenPage() {
+ const [isListening, setIsListening] = useState(false);
+ const [activeSpeaker, setActiveSpeaker] = useState<string | null>(null);
+ const [permissionRequested, setPermissionRequested] = useState(false);
+ const isListeningRef = useRef(false);
+
+ // Keep ref in sync with state for use in callbacks
+ useEffect(() => {
+ isListeningRef.current = isListening;
+ }, [isListening]);
+
+ const ws = useWebSocket({
+ onTranscript: (transcript) => {
+ // Track active speaker
+ if (!transcript.isFinal) {
+ setActiveSpeaker(transcript.speaker);
+ }
+ },
+ onStopped: () => {
+ setIsListening(false);
+ setActiveSpeaker(null);
+ },
+ });
+
+ const wsRef = useRef(ws);
+ useEffect(() => {
+ wsRef.current = ws;
+ }, [ws]);
+
+ const handleAudioData = useCallback((samples: Float32Array) => {
+ if (wsRef.current.isConnected && isListeningRef.current) {
+ wsRef.current.sendAudio(samples);
+ }
+ }, []);
+
+ const mic = useMicrophone({
+ sampleRate: 16000,
+ onAudioData: handleAudioData,
+ });
+
+ // Request microphone permission on page load
+ useEffect(() => {
+ if (!permissionRequested) {
+ setPermissionRequested(true);
+ mic.requestPermission();
+ }
+ }, [permissionRequested, mic.requestPermission]);
+
+ // Derive unique speakers from transcripts
+ const speakers = useMemo(() => {
+ const speakerSet = new Set<string>();
+ ws.transcripts.forEach((t) => speakerSet.add(t.speaker));
+
+ return Array.from(speakerSet).map((speaker) => ({
+ id: speaker,
+ label: speaker,
+ isActive: speaker === activeSpeaker,
+ }));
+ }, [ws.transcripts, activeSpeaker]);
+
+ // Clear active speaker after a delay
+ useEffect(() => {
+ if (activeSpeaker) {
+ const timer = setTimeout(() => setActiveSpeaker(null), 1500);
+ return () => clearTimeout(timer);
+ }
+ }, [activeSpeaker]);
+
+ const handleToggle = useCallback(async () => {
+ if (isListening) {
+ // Stop listening
+ mic.stop();
+ ws.stopSession("user_stopped");
+ setIsListening(false);
+ setActiveSpeaker(null);
+ return;
+ }
+
+ // If permission was denied or errored, try requesting again
+ if (mic.status === "denied" || mic.status === "error") {
+ const permitted = await mic.requestPermission();
+ if (!permitted) {
+ return;
+ }
+ }
+
+ // Start listening - start the microphone
+ const micStarted = await mic.start();
+ if (!micStarted) {
+ // Microphone permission denied or error
+ return;
+ }
+
+ // Microphone started, now connect to WebSocket
+ const connected = await ws.connect();
+ if (!connected) {
+ // Connection failed - stop the microphone
+ mic.stop();
+ return;
+ }
+
+ // Both microphone and WebSocket are ready - start the session
+ ws.startSession(mic.sampleRate, mic.channels);
+ setIsListening(true);
+ }, [isListening, mic, ws]);
+
+ const handleReset = useCallback(() => {
+ mic.stop();
+ if (ws.isConnected) {
+ ws.stopSession("reset");
+ }
+ ws.clearTranscripts();
+ ws.disconnect();
+ setIsListening(false);
+ setActiveSpeaker(null);
+ }, [mic, ws]);
+
+ const error = ws.error || mic.error;
+
+ return (
+ <div className="relative z-10 min-h-screen flex flex-col">
+ <Masthead showTicker={false} showNav />
+
+ <main className="flex-1 p-4 md:p-6 grid grid-cols-1 md:grid-cols-[300px_1fr] grid-rows-[1fr_auto] md:grid-rows-[1fr_200px] gap-4 min-h-0">
+ {/* Speaker Panel - top left on desktop, hidden on mobile */}
+ <div className="hidden md:block row-span-1">
+ <SpeakerPanel speakers={speakers} />
+ </div>
+
+ {/* Transcript Panel - right side, spans 2 rows on desktop */}
+ <div className="md:row-span-2 min-h-[300px] md:min-h-0">
+ <TranscriptPanel transcripts={ws.transcripts} />
+ </div>
+
+ {/* Control Panel - bottom left on desktop */}
+ <div className="md:col-start-1 md:row-start-2">
+ <ControlPanel
+ isListening={isListening}
+ isConnected={ws.isConnected}
+ micStatus={mic.status}
+ micVolume={mic.volume}
+ onToggle={handleToggle}
+ onReset={handleReset}
+ error={error}
+ />
+ </div>
+ </main>
+ </div>
+ );
+}