diff options
| author | soryu <soryu@soryu.co> | 2025-12-22 04:50:25 +0000 |
|---|---|---|
| committer | soryu <soryu@soryu.co> | 2025-12-23 14:47:18 +0000 |
| commit | 0741a8b8e9a2099c82bff6d6b9ebbce9c07cad53 (patch) | |
| tree | 88cbd5fecb9ca72a04aa07f1a6db4e1a751b1fd7 /makima/frontend/src/routes | |
| parent | aee2e4e784afd6d115fb5f7b40284c4efd2da966 (diff) | |
| download | soryu-0741a8b8e9a2099c82bff6d6b9ebbce9c07cad53.tar.gz soryu-0741a8b8e9a2099c82bff6d6b9ebbce9c07cad53.zip | |
Update makima FE to add initial listening system
Diffstat (limited to 'makima/frontend/src/routes')
| -rw-r--r-- | makima/frontend/src/routes/_index.tsx | 113 | ||||
| -rw-r--r-- | makima/frontend/src/routes/listen.tsx | 158 |
2 files changed, 271 insertions, 0 deletions
diff --git a/makima/frontend/src/routes/_index.tsx b/makima/frontend/src/routes/_index.tsx new file mode 100644 index 0000000..568e300 --- /dev/null +++ b/makima/frontend/src/routes/_index.tsx @@ -0,0 +1,113 @@ +import { Masthead } from "../components/Masthead"; +import { Logo } from "../components/Logo"; +import { RewriteLink } from "../components/RewriteLink"; + +export default function HomePage() { + return ( + <div className="relative z-10 min-h-screen flex flex-col"> + <Masthead showTicker showNav /> + + <main className="flex-1 p-6 md:p-8"> + <section className="grid grid-cols-1 md:grid-cols-[1.1fr_0.9fr] gap-6 items-center mb-8"> + <div className="order-2 md:order-1"> + <span className="inline-block px-2 py-1 border border-[#3f6fb3] bg-[#0f1c2f] text-[#9bc3ff] font-mono text-xs tracking-wide uppercase mb-3"> + Real-time STT Platform + </span> + <h2 className="m-0 mb-3 text-xl text-[#f0f5ff] tracking-wide"> + リアルタイム音声認識 + </h2> + <p className="my-2 text-[#e4edff]"> + Makima provides real-time speech-to-text transcription with speaker + diarization. Stream audio from your microphone or upload files for + instant transcription. + </p> + <p className="my-2 text-[#e4edff]"> + 高精度なリアルタイム音声認識エンジン。WebSocket + 経由でストリーミング処理を行い、話者分離にも対応。 + </p> + + <div className="flex flex-wrap gap-3 mt-4"> + <RewriteLink to="/listen" className="!py-2 !px-3 text-sm"> + Start Listening + </RewriteLink> + <RewriteLink href="https://github.com/soryu-co" external className="!py-2 !px-3 text-sm"> + GitHub + </RewriteLink> + <RewriteLink href="https://soryu.co" external className="!py-2 !px-3 text-sm"> + soryu.co + </RewriteLink> + </div> + </div> + + <div className="order-1 md:order-2 flex justify-center"> + <Logo size={160} /> + </div> + </section> + + <div className="panel p-4 md:p-5 relative"> + <span className="absolute -top-2.5 left-3 px-1.5 py-0.5 bg-[#0f1c2f] border border-[#3f6fb3] font-mono text-xs tracking-wide"> + FEATURES//MAKIMA + </span> + <h3 className="mt-2 mb-3 text-lg text-[#eef5ff]">Platform Features</h3> + <div className="grid grid-cols-1 md:grid-cols-2 gap-3 mt-3 text-sm text-[#e4edff]"> + <div className="border border-[#3f6fb3] bg-[#0d1b2d] p-3 font-mono tracking-tight"> + <strong className="text-[#9bc3ff]">Real-time Streaming</strong> + <br /> + WebSocket-based audio streaming with low latency transcription + </div> + <div className="border border-[#3f6fb3] bg-[#0d1b2d] p-3 font-mono tracking-tight"> + <strong className="text-[#9bc3ff]">Speaker Diarization</strong> + <br /> + Automatic speaker identification and labeling + </div> + <div className="border border-[#3f6fb3] bg-[#0d1b2d] p-3 font-mono tracking-tight"> + <strong className="text-[#9bc3ff]">End-of-Utterance</strong> + <br /> + Smart detection of speech boundaries + </div> + <div className="border border-[#3f6fb3] bg-[#0d1b2d] p-3 font-mono tracking-tight"> + <strong className="text-[#9bc3ff]">Multi-format</strong> + <br /> + Support for PCM32F and PCM16 audio encoding + </div> + </div> + </div> + + <div className="mt-6 border border-[#0f3c78] bg-[#0f3c78] text-[#dbe7ff] shadow-[inset_0_0_0_1px_rgba(255,255,255,0.1)]"> + <header className="border-b border-[rgba(197,219,255,0.4)] px-3 py-2.5 font-mono tracking-wide text-xs flex justify-between items-center"> + <strong>API // makima.jp</strong> + <span className="text-[#92b8ff]">endpoint: wss://api.makima.jp</span> + </header> + <pre className="m-0 px-3 py-4 font-mono text-xs leading-relaxed bg-[linear-gradient(0deg,rgba(255,255,255,0.05)_1px,rgba(15,60,120,0.25)_1px)]"> +{`[001] endpoint ............... /api/v1/listen +[002] protocol ............... WebSocket +[003] encoding ............... pcm32f / pcm16 +[004] sample_rate ............ 16000 Hz (default) +[005] features ............... transcription, diarization, eou`} + </pre> + </div> + </main> + + <footer className="px-4 py-3 border-t-4 border-double border-[#23477a] bg-gradient-to-b from-[rgba(11,18,32,0.95)] to-[rgba(14,26,45,0.7)] text-xs text-[#e4edff] text-center"> + <p className="m-1">© makima.jp</p> + <p className="m-1"> + <a + href="https://makima.jp" + className="text-[#dbe7ff] underline" + > + https://makima.jp + </a> + {" / "} + <a + href="https://soryu.co" + target="_blank" + rel="noopener noreferrer" + className="text-[#dbe7ff] underline" + > + https://soryu.co + </a> + </p> + </footer> + </div> + ); +} diff --git a/makima/frontend/src/routes/listen.tsx b/makima/frontend/src/routes/listen.tsx new file mode 100644 index 0000000..06769fd --- /dev/null +++ b/makima/frontend/src/routes/listen.tsx @@ -0,0 +1,158 @@ +import { useState, useCallback, useMemo, useEffect, useRef } from "react"; +import { Masthead } from "../components/Masthead"; +import { SpeakerPanel } from "../components/listen/SpeakerPanel"; +import { TranscriptPanel } from "../components/listen/TranscriptPanel"; +import { ControlPanel } from "../components/listen/ControlPanel"; +import { useMicrophone } from "../hooks/useMicrophone"; +import { useWebSocket } from "../hooks/useWebSocket"; + +export default function ListenPage() { + const [isListening, setIsListening] = useState(false); + const [activeSpeaker, setActiveSpeaker] = useState<string | null>(null); + const [permissionRequested, setPermissionRequested] = useState(false); + const isListeningRef = useRef(false); + + // Keep ref in sync with state for use in callbacks + useEffect(() => { + isListeningRef.current = isListening; + }, [isListening]); + + const ws = useWebSocket({ + onTranscript: (transcript) => { + // Track active speaker + if (!transcript.isFinal) { + setActiveSpeaker(transcript.speaker); + } + }, + onStopped: () => { + setIsListening(false); + setActiveSpeaker(null); + }, + }); + + const wsRef = useRef(ws); + useEffect(() => { + wsRef.current = ws; + }, [ws]); + + const handleAudioData = useCallback((samples: Float32Array) => { + if (wsRef.current.isConnected && isListeningRef.current) { + wsRef.current.sendAudio(samples); + } + }, []); + + const mic = useMicrophone({ + sampleRate: 16000, + onAudioData: handleAudioData, + }); + + // Request microphone permission on page load + useEffect(() => { + if (!permissionRequested) { + setPermissionRequested(true); + mic.requestPermission(); + } + }, [permissionRequested, mic.requestPermission]); + + // Derive unique speakers from transcripts + const speakers = useMemo(() => { + const speakerSet = new Set<string>(); + ws.transcripts.forEach((t) => speakerSet.add(t.speaker)); + + return Array.from(speakerSet).map((speaker) => ({ + id: speaker, + label: speaker, + isActive: speaker === activeSpeaker, + })); + }, [ws.transcripts, activeSpeaker]); + + // Clear active speaker after a delay + useEffect(() => { + if (activeSpeaker) { + const timer = setTimeout(() => setActiveSpeaker(null), 1500); + return () => clearTimeout(timer); + } + }, [activeSpeaker]); + + const handleToggle = useCallback(async () => { + if (isListening) { + // Stop listening + mic.stop(); + ws.stopSession("user_stopped"); + setIsListening(false); + setActiveSpeaker(null); + return; + } + + // If permission was denied or errored, try requesting again + if (mic.status === "denied" || mic.status === "error") { + const permitted = await mic.requestPermission(); + if (!permitted) { + return; + } + } + + // Start listening - start the microphone + const micStarted = await mic.start(); + if (!micStarted) { + // Microphone permission denied or error + return; + } + + // Microphone started, now connect to WebSocket + const connected = await ws.connect(); + if (!connected) { + // Connection failed - stop the microphone + mic.stop(); + return; + } + + // Both microphone and WebSocket are ready - start the session + ws.startSession(mic.sampleRate, mic.channels); + setIsListening(true); + }, [isListening, mic, ws]); + + const handleReset = useCallback(() => { + mic.stop(); + if (ws.isConnected) { + ws.stopSession("reset"); + } + ws.clearTranscripts(); + ws.disconnect(); + setIsListening(false); + setActiveSpeaker(null); + }, [mic, ws]); + + const error = ws.error || mic.error; + + return ( + <div className="relative z-10 min-h-screen flex flex-col"> + <Masthead showTicker={false} showNav /> + + <main className="flex-1 p-4 md:p-6 grid grid-cols-1 md:grid-cols-[300px_1fr] grid-rows-[1fr_auto] md:grid-rows-[1fr_200px] gap-4 min-h-0"> + {/* Speaker Panel - top left on desktop, hidden on mobile */} + <div className="hidden md:block row-span-1"> + <SpeakerPanel speakers={speakers} /> + </div> + + {/* Transcript Panel - right side, spans 2 rows on desktop */} + <div className="md:row-span-2 min-h-[300px] md:min-h-0"> + <TranscriptPanel transcripts={ws.transcripts} /> + </div> + + {/* Control Panel - bottom left on desktop */} + <div className="md:col-start-1 md:row-start-2"> + <ControlPanel + isListening={isListening} + isConnected={ws.isConnected} + micStatus={mic.status} + micVolume={mic.volume} + onToggle={handleToggle} + onReset={handleReset} + error={error} + /> + </div> + </main> + </div> + ); +} |
