diff options
Diffstat (limited to 'makima/frontend')
| -rw-r--r-- | makima/frontend/src/components/listen/ControlPanel.tsx | 36 | ||||
| -rw-r--r-- | makima/frontend/src/hooks/useSpeakWebSocket.ts | 329 | ||||
| -rw-r--r-- | makima/frontend/src/index.css | 6 | ||||
| -rw-r--r-- | makima/frontend/src/lib/api.ts | 1 | ||||
| -rw-r--r-- | makima/frontend/src/main.tsx | 9 | ||||
| -rw-r--r-- | makima/frontend/src/routes/listen.tsx | 1 | ||||
| -rw-r--r-- | makima/frontend/src/routes/speak.tsx | 159 |
7 files changed, 534 insertions, 7 deletions
diff --git a/makima/frontend/src/components/listen/ControlPanel.tsx b/makima/frontend/src/components/listen/ControlPanel.tsx index f0e5702..f482ec4 100644 --- a/makima/frontend/src/components/listen/ControlPanel.tsx +++ b/makima/frontend/src/components/listen/ControlPanel.tsx @@ -1,6 +1,7 @@ import { useState } from "react"; import { Logo } from "../Logo"; import type { MicrophoneStatus } from "../../hooks/useMicrophone"; +import type { ConnectionStatus } from "../../hooks/useWebSocket"; import { ContractPickerModal } from "./ContractPickerModal"; export interface ContractOption { @@ -22,6 +23,8 @@ interface ControlPanelProps { selectedContractId: string | null; onContractChange: (contractId: string | null) => void; contractsLoading?: boolean; + // Connection status for loading state + connectionStatus?: ConnectionStatus; } function getStatusText(isListening: boolean, micStatus: MicrophoneStatus): string { @@ -54,6 +57,7 @@ export function ControlPanel({ selectedContractId, onContractChange, contractsLoading, + connectionStatus, }: ControlPanelProps) { const [isModalOpen, setIsModalOpen] = useState(false); const statusText = getStatusText(isListening, micStatus); @@ -121,18 +125,36 @@ export function ControlPanel({ {/* Connection status */} <div - className={`inline-flex items-center gap-1.5 px-2 py-1 border ${ + className={`inline-flex flex-col gap-1 px-2 py-1 border ${ isConnected ? "border-[#3f6fb3] text-[#75aafc]" + : connectionStatus === "connecting" + ? "border-[#3f6fb3] text-[#9bc3ff]" : "border-[rgba(117,170,252,0.25)] text-[#9bc3ff]" }`} > - <span - className={`w-1.5 h-1.5 rounded-full ${ - isConnected ? "bg-[#75aafc]" : "bg-[#3f6fb3]" - }`} - /> - {isConnected ? "CONNECTED" : "DISCONNECTED"} + <div className="inline-flex items-center gap-1.5"> + <span + className={`w-1.5 h-1.5 rounded-full ${ + isConnected ? "bg-[#75aafc]" : "bg-[#3f6fb3]" + }`} + /> + {isConnected + ? "CONNECTED" + : connectionStatus === "connecting" + ? "LOADING MODELS..." + : "DISCONNECTED"} + </div> + {connectionStatus === "connecting" && ( + <div className="w-full h-1.5 bg-[#0f1c2f] overflow-hidden"> + <div + className="h-full w-1/3 bg-[#75aafc]" + style={{ + animation: "loading-slide 1.5s ease-in-out infinite", + }} + /> + </div> + )} </div> </div> diff --git a/makima/frontend/src/hooks/useSpeakWebSocket.ts b/makima/frontend/src/hooks/useSpeakWebSocket.ts new file mode 100644 index 0000000..3ef8851 --- /dev/null +++ b/makima/frontend/src/hooks/useSpeakWebSocket.ts @@ -0,0 +1,329 @@ +import { useState, useCallback, useRef, useEffect } from "react"; +import { SPEAK_ENDPOINT } from "../lib/api"; + +export type SpeakStatus = + | "disconnected" + | "connecting" + | "connected" + | "loading_model" + | "speaking" + | "error"; + +export interface SpeakWebSocketState { + status: SpeakStatus; + error: string | null; +} + +export function useSpeakWebSocket() { + const [state, setState] = useState<SpeakWebSocketState>({ + status: "disconnected", + error: null, + }); + + const wsRef = useRef<WebSocket | null>(null); + const audioContextRef = useRef<AudioContext | null>(null); + const audioQueueRef = useRef<Float32Array[]>([]); + const isPlayingRef = useRef(false); + const modelLoadingTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null); + const nextPlayTimeRef = useRef(0); + + // Clean up on unmount + useEffect(() => { + return () => { + if (wsRef.current) { + wsRef.current.close(); + wsRef.current = null; + } + if (audioContextRef.current) { + audioContextRef.current.close(); + audioContextRef.current = null; + } + if (modelLoadingTimerRef.current) { + clearTimeout(modelLoadingTimerRef.current); + modelLoadingTimerRef.current = null; + } + }; + }, []); + + const getAudioContext = useCallback((): AudioContext => { + if (!audioContextRef.current || audioContextRef.current.state === "closed") { + audioContextRef.current = new AudioContext({ sampleRate: 24000 }); + } + return audioContextRef.current; + }, []); + + const playAudioQueue = useCallback(() => { + if (isPlayingRef.current) return; + isPlayingRef.current = true; + + const ctx = getAudioContext(); + + function scheduleNext() { + const chunk = audioQueueRef.current.shift(); + if (!chunk) { + isPlayingRef.current = false; + return; + } + + const buffer = ctx.createBuffer(1, chunk.length, 24000); + buffer.copyToChannel(chunk, 0); + + const source = ctx.createBufferSource(); + source.buffer = buffer; + source.connect(ctx.destination); + + // Schedule playback at the right time to avoid gaps + const now = ctx.currentTime; + const startTime = Math.max(now, nextPlayTimeRef.current); + source.start(startTime); + nextPlayTimeRef.current = startTime + buffer.duration; + + source.onended = () => { + if (audioQueueRef.current.length > 0) { + scheduleNext(); + } else { + isPlayingRef.current = false; + } + }; + } + + scheduleNext(); + }, [getAudioContext]); + + const connect = useCallback((): Promise<boolean> => { + return new Promise((resolve) => { + if (wsRef.current?.readyState === WebSocket.OPEN) { + resolve(true); + return; + } + + if (wsRef.current) { + wsRef.current.close(); + wsRef.current = null; + } + + setState({ status: "connecting", error: null }); + + try { + const ws = new WebSocket(SPEAK_ENDPOINT); + ws.binaryType = "arraybuffer"; + wsRef.current = ws; + + ws.onopen = () => { + setState({ status: "connected", error: null }); + resolve(true); + }; + + ws.onmessage = (event) => { + // Binary data = PCM audio chunk + if (event.data instanceof ArrayBuffer) { + // Clear model loading timer on first audio data + if (modelLoadingTimerRef.current) { + clearTimeout(modelLoadingTimerRef.current); + modelLoadingTimerRef.current = null; + } + + // Update status to speaking if not already + setState((s) => { + if (s.status === "loading_model" || s.status === "connected") { + return { ...s, status: "speaking" }; + } + return s; + }); + + // Convert PCM16 LE to Float32 + const pcm16 = new Int16Array(event.data); + const float32 = new Float32Array(pcm16.length); + for (let i = 0; i < pcm16.length; i++) { + float32[i] = pcm16[i] / 32768; + } + + audioQueueRef.current.push(float32); + playAudioQueue(); + return; + } + + // Text data = JSON message + try { + const message = JSON.parse(event.data); + + switch (message.type) { + case "audio_end": + // Clear model loading timer + if (modelLoadingTimerRef.current) { + clearTimeout(modelLoadingTimerRef.current); + modelLoadingTimerRef.current = null; + } + // Wait for audio queue to drain, then go back to connected + // Use a short delay to let buffered audio finish + { + const checkDone = () => { + if (audioQueueRef.current.length === 0 && !isPlayingRef.current) { + setState((s) => { + if (s.status === "speaking" || s.status === "loading_model") { + return { ...s, status: "connected" }; + } + return s; + }); + } else { + setTimeout(checkDone, 100); + } + }; + checkDone(); + } + break; + + case "error": + if (modelLoadingTimerRef.current) { + clearTimeout(modelLoadingTimerRef.current); + modelLoadingTimerRef.current = null; + } + setState({ + status: "error", + error: message.message || `Error: ${message.code}`, + }); + break; + } + } catch { + console.error("Failed to parse speak WebSocket message:", event.data); + } + }; + + ws.onerror = () => { + setState({ + status: "error", + error: "Failed to connect to speak server", + }); + resolve(false); + }; + + ws.onclose = (event) => { + if (modelLoadingTimerRef.current) { + clearTimeout(modelLoadingTimerRef.current); + modelLoadingTimerRef.current = null; + } + + let errorMessage: string | null = null; + if (event.code === 1006) { + errorMessage = "Connection failed - server may be unavailable"; + } else if (event.code !== 1000 && event.code !== 1001) { + errorMessage = `Connection closed unexpectedly (code: ${event.code})`; + } + + setState((s) => ({ + status: "disconnected", + error: errorMessage || s.error, + })); + wsRef.current = null; + }; + } catch (err) { + const message = + err instanceof Error ? err.message : "Failed to create WebSocket connection"; + setState({ status: "error", error: message }); + resolve(false); + } + }); + }, [playAudioQueue]); + + const speak = useCallback( + async (text: string) => { + if (!text.trim()) return; + + // Connect if not connected + if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) { + const connected = await connect(); + if (!connected) return; + } + + // Reset audio state + audioQueueRef.current = []; + isPlayingRef.current = false; + nextPlayTimeRef.current = 0; + + // Resume audio context if suspended (browser autoplay policy) + const ctx = getAudioContext(); + if (ctx.state === "suspended") { + await ctx.resume(); + } + + // Start loading timer - if no audio arrives in 2 seconds, show loading state + modelLoadingTimerRef.current = setTimeout(() => { + setState((s) => { + if (s.status === "connected" || s.status === "connecting") { + return { ...s, status: "loading_model" }; + } + return s; + }); + modelLoadingTimerRef.current = null; + }, 2000); + + // Send speak request + wsRef.current?.send( + JSON.stringify({ type: "speak", text }) + ); + + setState((s) => ({ ...s, error: null })); + }, + [connect, getAudioContext] + ); + + const cancel = useCallback(() => { + // Clear audio queue + audioQueueRef.current = []; + isPlayingRef.current = false; + nextPlayTimeRef.current = 0; + + // Clear model loading timer + if (modelLoadingTimerRef.current) { + clearTimeout(modelLoadingTimerRef.current); + modelLoadingTimerRef.current = null; + } + + // Send cancel message + if (wsRef.current?.readyState === WebSocket.OPEN) { + wsRef.current.send(JSON.stringify({ type: "cancel" })); + } + + setState((s) => ({ + ...s, + status: wsRef.current?.readyState === WebSocket.OPEN ? "connected" : "disconnected", + })); + }, []); + + const disconnect = useCallback(() => { + // Clear audio queue + audioQueueRef.current = []; + isPlayingRef.current = false; + nextPlayTimeRef.current = 0; + + if (modelLoadingTimerRef.current) { + clearTimeout(modelLoadingTimerRef.current); + modelLoadingTimerRef.current = null; + } + + if (wsRef.current) { + // Send stop message before closing + if (wsRef.current.readyState === WebSocket.OPEN) { + wsRef.current.send(JSON.stringify({ type: "stop" })); + } + wsRef.current.close(1000, "User disconnected"); + wsRef.current = null; + } + + setState({ status: "disconnected", error: null }); + }, []); + + return { + ...state, + isConnected: + state.status === "connected" || + state.status === "speaking" || + state.status === "loading_model", + isSpeaking: state.status === "speaking", + isModelLoading: state.status === "loading_model", + speak, + cancel, + connect, + disconnect, + }; +} diff --git a/makima/frontend/src/index.css b/makima/frontend/src/index.css index 5c08006..f29873b 100644 --- a/makima/frontend/src/index.css +++ b/makima/frontend/src/index.css @@ -64,6 +64,12 @@ body { background: rgba(117, 170, 252, 0.35); } +/* Loading bar animation for indeterminate progress */ +@keyframes loading-slide { + 0% { transform: translateX(-100%); } + 100% { transform: translateX(300%); } +} + /* Grid overlay */ .grid-overlay { position: fixed; diff --git a/makima/frontend/src/lib/api.ts b/makima/frontend/src/lib/api.ts index 4390b20..ca04ce7 100644 --- a/makima/frontend/src/lib/api.ts +++ b/makima/frontend/src/lib/api.ts @@ -99,6 +99,7 @@ async function authFetch(url: string, options: RequestInit = {}): Promise<Respon }); } export const LISTEN_ENDPOINT = `${WS_BASE}/api/v1/listen`; +export const SPEAK_ENDPOINT = `${WS_BASE}/api/v1/speak`; export const FILE_SUBSCRIBE_ENDPOINT = `${WS_BASE}/api/v1/files/subscribe`; export const TASK_SUBSCRIBE_ENDPOINT = `${WS_BASE}/api/v1/mesh/tasks/subscribe`; diff --git a/makima/frontend/src/main.tsx b/makima/frontend/src/main.tsx index 383b732..ef1ba5c 100644 --- a/makima/frontend/src/main.tsx +++ b/makima/frontend/src/main.tsx @@ -19,6 +19,7 @@ import LoginPage from "./routes/login"; import SettingsPage from "./routes/settings"; import ContractFilePage from "./routes/contract-file"; import TemplatesPage from "./routes/templates"; +import SpeakPage from "./routes/speak"; createRoot(document.getElementById("root")!).render( <StrictMode> @@ -135,6 +136,14 @@ createRoot(document.getElementById("root")!).render( </ProtectedRoute> } /> + <Route + path="/speak" + element={ + <ProtectedRoute> + <SpeakPage /> + </ProtectedRoute> + } + /> </Routes> </BrowserRouter> </SupervisorQuestionsProvider> diff --git a/makima/frontend/src/routes/listen.tsx b/makima/frontend/src/routes/listen.tsx index 55cf7e6..8af538e 100644 --- a/makima/frontend/src/routes/listen.tsx +++ b/makima/frontend/src/routes/listen.tsx @@ -207,6 +207,7 @@ export default function ListenPage() { selectedContractId={selectedContractId} onContractChange={setSelectedContractId} contractsLoading={contractsLoading} + connectionStatus={ws.status} /> </div> </main> diff --git a/makima/frontend/src/routes/speak.tsx b/makima/frontend/src/routes/speak.tsx new file mode 100644 index 0000000..c4692ff --- /dev/null +++ b/makima/frontend/src/routes/speak.tsx @@ -0,0 +1,159 @@ +import { useState, useCallback } from "react"; +import { Masthead } from "../components/Masthead"; +import { useSpeakWebSocket } from "../hooks/useSpeakWebSocket"; + +export default function SpeakPage() { + const [text, setText] = useState(""); + const tts = useSpeakWebSocket(); + + const handleSpeak = useCallback(() => { + if (!text.trim()) return; + tts.speak(text); + }, [text, tts]); + + const handleCancel = useCallback(() => { + tts.cancel(); + }, [tts]); + + const handleKeyDown = useCallback( + (e: React.KeyboardEvent<HTMLTextAreaElement>) => { + // Ctrl/Cmd + Enter to speak + if ((e.ctrlKey || e.metaKey) && e.key === "Enter") { + e.preventDefault(); + handleSpeak(); + } + }, + [handleSpeak] + ); + + const statusLabel = (() => { + switch (tts.status) { + case "disconnected": + return "DISCONNECTED"; + case "connecting": + return "CONNECTING..."; + case "connected": + return "CONNECTED"; + case "loading_model": + return "LOADING TTS MODEL..."; + case "speaking": + return "SPEAKING"; + case "error": + return "ERROR"; + default: + return "IDLE"; + } + })(); + + const statusColor = (() => { + switch (tts.status) { + case "connected": + case "speaking": + return "border-[#3f6fb3] text-[#75aafc]"; + case "error": + return "border-red-400/50 text-red-400"; + default: + return "border-[rgba(117,170,252,0.25)] text-[#9bc3ff]"; + } + })(); + + const dotColor = (() => { + switch (tts.status) { + case "connected": + case "speaking": + return "bg-[#75aafc]"; + case "error": + return "bg-red-400"; + default: + return "bg-[#3f6fb3]"; + } + })(); + + return ( + <div className="relative z-10 h-screen flex flex-col overflow-hidden"> + <Masthead showTicker={false} showNav /> + + <main className="flex-1 flex flex-col items-center justify-center p-4 md:p-8 gap-6 min-h-0 overflow-auto"> + {/* Text input area */} + <div className="w-full max-w-2xl"> + <textarea + value={text} + onChange={(e) => setText(e.target.value)} + onKeyDown={handleKeyDown} + placeholder="Enter text to speak..." + disabled={tts.isSpeaking || tts.isModelLoading} + className="w-full h-48 p-4 font-mono text-sm text-[#dbe7ff] bg-[#0d1b2d] border border-[#0f3c78] focus:border-[#3f6fb3] focus:outline-none placeholder-[#3f6fb3] resize-none transition-colors disabled:opacity-50" + /> + <div className="mt-1 text-right font-mono text-xs text-[#3f6fb3]"> + Ctrl+Enter to speak + </div> + </div> + + {/* Controls row */} + <div className="w-full max-w-2xl flex items-center gap-4"> + {/* Speak / Cancel button */} + {tts.isSpeaking || tts.isModelLoading ? ( + <button + onClick={handleCancel} + className="px-6 py-2 font-mono text-sm text-red-400 bg-[#0d1b2d] border border-red-400/50 hover:border-red-400 transition-colors uppercase tracking-wide" + > + Cancel + </button> + ) : ( + <button + onClick={handleSpeak} + disabled={!text.trim()} + className="px-6 py-2 font-mono text-sm text-[#dbe7ff] bg-[#0d1b2d] border border-[#0f3c78] hover:border-[#3f6fb3] transition-colors uppercase tracking-wide disabled:opacity-50 disabled:cursor-not-allowed" + > + Speak + </button> + )} + + {/* Status indicator */} + <div + className={`inline-flex items-center gap-1.5 px-2 py-1 border font-mono text-xs tracking-wide uppercase ${statusColor}`} + > + <span className={`w-1.5 h-1.5 rounded-full ${dotColor}`} /> + {statusLabel} + </div> + </div> + + {/* Loading bar (indeterminate) */} + {tts.isModelLoading && ( + <div className="w-full max-w-2xl"> + <div className="w-full h-1.5 bg-[#0f1c2f] overflow-hidden"> + <div + className="h-full w-1/3 bg-[#75aafc]" + style={{ + animation: "loading-slide 1.5s ease-in-out infinite", + }} + /> + </div> + <div className="mt-2 font-mono text-xs text-[#9bc3ff] text-center tracking-wide uppercase"> + Loading TTS model... This may take a moment on first use. + </div> + </div> + )} + + {/* Speaking animation bar */} + {tts.isSpeaking && ( + <div className="w-full max-w-2xl"> + <div className="w-full h-1.5 bg-[#0f1c2f] overflow-hidden"> + <div + className="h-full w-full bg-[#75aafc] animate-pulse" + /> + </div> + </div> + )} + + {/* Error display */} + {tts.error && ( + <div className="w-full max-w-2xl font-mono text-xs text-red-400 text-center px-4 py-2 border border-red-400/50 bg-red-400/10"> + {tts.error} + </div> + )} + </main> + + </div> + ); +} |
