diff options
| author | soryu <soryu@soryu.co> | 2025-12-22 04:50:25 +0000 |
|---|---|---|
| committer | soryu <soryu@soryu.co> | 2025-12-23 14:47:18 +0000 |
| commit | 0741a8b8e9a2099c82bff6d6b9ebbce9c07cad53 (patch) | |
| tree | 88cbd5fecb9ca72a04aa07f1a6db4e1a751b1fd7 /makima/frontend/src | |
| parent | aee2e4e784afd6d115fb5f7b40284c4efd2da966 (diff) | |
| download | soryu-0741a8b8e9a2099c82bff6d6b9ebbce9c07cad53.tar.gz soryu-0741a8b8e9a2099c82bff6d6b9ebbce9c07cad53.zip | |
Update makima FE to add initial listening system
Diffstat (limited to 'makima/frontend/src')
| -rw-r--r-- | makima/frontend/src/components/GridOverlay.tsx | 3 | ||||
| -rw-r--r-- | makima/frontend/src/components/Logo.tsx | 130 | ||||
| -rw-r--r-- | makima/frontend/src/components/Masthead.tsx | 44 | ||||
| -rw-r--r-- | makima/frontend/src/components/NavStrip.tsx | 40 | ||||
| -rw-r--r-- | makima/frontend/src/components/RewriteLink.tsx | 63 | ||||
| -rw-r--r-- | makima/frontend/src/components/listen/ControlPanel.tsx | 143 | ||||
| -rw-r--r-- | makima/frontend/src/components/listen/SpeakerPanel.tsx | 61 | ||||
| -rw-r--r-- | makima/frontend/src/components/listen/TranscriptPanel.tsx | 85 | ||||
| -rw-r--r-- | makima/frontend/src/hooks/useMicrophone.ts | 248 | ||||
| -rw-r--r-- | makima/frontend/src/hooks/useTextScramble.ts | 52 | ||||
| -rw-r--r-- | makima/frontend/src/hooks/useWebSocket.ts | 244 | ||||
| -rw-r--r-- | makima/frontend/src/index.css | 390 | ||||
| -rw-r--r-- | makima/frontend/src/lib/api.ts | 40 | ||||
| -rw-r--r-- | makima/frontend/src/main.tsx | 19 | ||||
| -rw-r--r-- | makima/frontend/src/routes/_index.tsx | 113 | ||||
| -rw-r--r-- | makima/frontend/src/routes/listen.tsx | 158 | ||||
| -rw-r--r-- | makima/frontend/src/types/messages.ts | 56 | ||||
| -rw-r--r-- | makima/frontend/src/vite-env.d.ts | 14 |
18 files changed, 1903 insertions, 0 deletions
diff --git a/makima/frontend/src/components/GridOverlay.tsx b/makima/frontend/src/components/GridOverlay.tsx new file mode 100644 index 0000000..6728149 --- /dev/null +++ b/makima/frontend/src/components/GridOverlay.tsx @@ -0,0 +1,3 @@ +export function GridOverlay() { + return <div className="grid-overlay" aria-hidden="true" />; +} diff --git a/makima/frontend/src/components/Logo.tsx b/makima/frontend/src/components/Logo.tsx new file mode 100644 index 0000000..5cbde9f --- /dev/null +++ b/makima/frontend/src/components/Logo.tsx @@ -0,0 +1,130 @@ +interface LogoProps { + size?: number; + listening?: boolean; + onClick?: () => void; + className?: string; + noHoverAnimation?: boolean; +} + +export function Logo({ + size = 160, + listening = false, + onClick, + className = "", + noHoverAnimation = false, +}: LogoProps) { + const shellSize = size * 1.4375; // 230/160 ratio + const haloSize = size * 1.3125; // 210/160 ratio + + return ( + <div + className={`relative grid place-items-center ${className}`} + style={{ + width: shellSize, + height: shellSize, + filter: "drop-shadow(0 10px 26px rgba(12, 35, 67, 0.32))", + }} + > + <div + className={`logo-shell ${listening ? "listening" : ""} ${noHoverAnimation ? "no-hover-animation" : ""} ${onClick ? "cursor-pointer" : ""}`} + style={{ width: shellSize, height: shellSize }} + onClick={onClick} + role={onClick ? "button" : undefined} + tabIndex={onClick ? 0 : undefined} + onKeyDown={ + onClick + ? (e) => { + if (e.key === "Enter" || e.key === " ") { + e.preventDefault(); + onClick(); + } + } + : undefined + } + > + <span className="scan-sweep" /> + <span className="scan-sweep sweep-2" /> + <svg + className="logo-svg" + viewBox="0 0 120 120" + xmlns="http://www.w3.org/2000/svg" + style={{ width: size, height: size }} + role="img" + aria-label="Makima logo" + > + <circle + className="ring ring-outer" + cx="60" + cy="60" + r="52" + strokeWidth="4" + /> + <circle + className="ring ring-middle" + cx="60" + cy="60" + r="36" + strokeWidth="3" + /> + <circle + className="ring ring-inner" + cx="60" + cy="60" + r="22" + strokeWidth="3" + /> + <circle className="core" cx="60" cy="60" r="8" /> + </svg> + </div> + <div + className="halo" + aria-hidden="true" + style={{ width: haloSize, height: haloSize }} + /> + </div> + ); +} + +// Small logo for header +export function LogoMark({ size = 32 }: { size?: number }) { + return ( + <span + className="inline-flex items-center justify-center" + style={{ width: size, height: size }} + aria-hidden="true" + > + <svg + width={size} + height={size} + viewBox="0 0 24 24" + xmlns="http://www.w3.org/2000/svg" + > + <circle + cx="12" + cy="12" + r="10" + fill="none" + stroke="#0f3c78" + strokeWidth="2" + /> + <circle + cx="12" + cy="12" + r="7" + fill="none" + stroke="#0f3c78" + strokeWidth="1.6" + /> + <circle + cx="12" + cy="12" + r="4" + fill="none" + stroke="#0f3c78" + strokeWidth="1.6" + /> + <circle cx="12" cy="12" r="1.6" fill="#0f3c78" /> + </svg> + </span> + ); +} diff --git a/makima/frontend/src/components/Masthead.tsx b/makima/frontend/src/components/Masthead.tsx new file mode 100644 index 0000000..a89977f --- /dev/null +++ b/makima/frontend/src/components/Masthead.tsx @@ -0,0 +1,44 @@ +import { Link } from "react-router"; +import { LogoMark } from "./Logo"; +import { NavStrip } from "./NavStrip"; + +interface MastheadProps { + showTicker?: boolean; + showNav?: boolean; +} + +export function Masthead({ showTicker = false, showNav = true }: MastheadProps) { + return ( + <header className="border-b-4 border-double border-[#050d1f] bg-[#08162e]"> + <div className="flex items-center gap-3 px-4 py-3"> + <Link to="/" className="flex items-center gap-3 no-underline"> + <LogoMark size={32} /> + <div> + <h1 className="m-0 text-xl text-white tracking-widest font-normal"> + makima.jp + </h1> + <small className="block text-[#dbe7ff] text-xs tracking-wide"> + Real-time Speech Recognition + </small> + </div> + </Link> + </div> + + {showTicker && ( + <div className="relative overflow-hidden border border-[#153667] bg-[#0a2242] text-[#b9d4ff] font-mono text-xs px-2.5 py-2 mx-4 mb-3"> + <div className="absolute inset-y-0 left-0 w-3 bg-gradient-to-b from-[rgba(231,237,247,0.5)] to-transparent" /> + <div className="absolute inset-y-0 right-0 w-3 bg-gradient-to-b from-[rgba(231,237,247,0.5)] to-transparent rotate-180" /> + <span className="ticker-content"> + /// MAKIMA INFORMATION SERVICE // REAL-TIME STT PLATFORM /// + TRANSPORT: WEBSOCKET /// ENCODING: PCM32F /// STATUS: ONLINE /// + MAKIMA.JP /// MAKIMA INFORMATION SERVICE // REAL-TIME STT PLATFORM + /// TRANSPORT: WEBSOCKET /// ENCODING: PCM32F /// STATUS: ONLINE /// + MAKIMA.JP /// + </span> + </div> + )} + + {showNav && <NavStrip />} + </header> + ); +} diff --git a/makima/frontend/src/components/NavStrip.tsx b/makima/frontend/src/components/NavStrip.tsx new file mode 100644 index 0000000..875af5a --- /dev/null +++ b/makima/frontend/src/components/NavStrip.tsx @@ -0,0 +1,40 @@ +import { RewriteLink } from "./RewriteLink"; + +interface NavLink { + label: string; + href: string; + disabled?: boolean; + external?: boolean; +} + +const NAV_LINKS: NavLink[] = [ + { label: "Listen", href: "/listen" }, + { label: "Mesh", href: "/mesh", disabled: true }, + { label: "Register", href: "/register", disabled: true }, + { label: "Login", href: "/login", disabled: true }, +]; + +export function NavStrip() { + return ( + <nav + className="flex items-center gap-2.5 px-3 py-2.5 border-t border-b border-dashed border-[rgba(117,170,252,0.35)] bg-[#0c1729] font-mono uppercase tracking-wide text-[11px]" + aria-label="Main navigation" + > + <span className="text-[#9bc3ff] pr-2.5 border-r border-[rgba(117,170,252,0.35)]"> + NAV// + </span> + <div className="flex flex-wrap gap-2 items-center"> + {NAV_LINKS.map((link) => ( + <RewriteLink + key={link.label} + to={link.href} + disabled={link.disabled} + external={link.external} + > + {link.label} + </RewriteLink> + ))} + </div> + </nav> + ); +} diff --git a/makima/frontend/src/components/RewriteLink.tsx b/makima/frontend/src/components/RewriteLink.tsx new file mode 100644 index 0000000..6e591a1 --- /dev/null +++ b/makima/frontend/src/components/RewriteLink.tsx @@ -0,0 +1,63 @@ +import { Link } from "react-router"; +import { useTextScramble } from "../hooks/useTextScramble"; + +interface RewriteLinkProps { + to?: string; + href?: string; + children: string; + disabled?: boolean; + external?: boolean; + className?: string; +} + +export function RewriteLink({ + to, + href, + children, + disabled = false, + external = false, + className = "", +}: RewriteLinkProps) { + const { displayText, scramble, reset } = useTextScramble(children); + + const baseClass = `rewrite-link ${className}`; + + if (disabled) { + return ( + <span + className={baseClass} + aria-disabled="true" + onMouseEnter={scramble} + onMouseLeave={reset} + > + {displayText} + </span> + ); + } + + if (external || href) { + return ( + <a + href={href || to} + target="_blank" + rel="noopener noreferrer" + className={baseClass} + onMouseEnter={scramble} + onMouseLeave={reset} + > + {displayText} + </a> + ); + } + + return ( + <Link + to={to || "/"} + className={baseClass} + onMouseEnter={scramble} + onMouseLeave={reset} + > + {displayText} + </Link> + ); +} diff --git a/makima/frontend/src/components/listen/ControlPanel.tsx b/makima/frontend/src/components/listen/ControlPanel.tsx new file mode 100644 index 0000000..4d86850 --- /dev/null +++ b/makima/frontend/src/components/listen/ControlPanel.tsx @@ -0,0 +1,143 @@ +import { Logo } from "../Logo"; +import type { MicrophoneStatus } from "../../hooks/useMicrophone"; + +interface ControlPanelProps { + isListening: boolean; + isConnected: boolean; + micStatus: MicrophoneStatus; + micVolume: number; + onToggle: () => void; + onReset: () => void; + error?: string | null; +} + +function getStatusText(isListening: boolean, micStatus: MicrophoneStatus): string { + if (isListening) return "Listening..."; + + switch (micStatus) { + case "requesting": + return "Requesting permission..."; + case "ready": + return "Click to start"; + case "denied": + return "Permission denied - click to retry"; + case "error": + return "Error - click to retry"; + default: + return "Click to start"; + } +} + +export function ControlPanel({ + isListening, + isConnected, + micStatus, + micVolume, + onToggle, + onReset, + error, +}: ControlPanelProps) { + const statusText = getStatusText(isListening, micStatus); + const isRequesting = micStatus === "requesting"; + + return ( + <div className="panel h-full p-4 flex flex-col items-center justify-center gap-4"> + {/* Logo button */} + <div className="flex flex-col items-center gap-2"> + <Logo + size={100} + listening={isListening || isRequesting} + onClick={isRequesting ? undefined : onToggle} + className={isRequesting ? "opacity-50" : "cursor-pointer"} + noHoverAnimation + /> + <span className="font-mono text-xs text-[#9bc3ff] tracking-wide uppercase text-center"> + {statusText} + </span> + </div> + + {/* Status indicators */} + <div className="font-mono text-xs text-center flex flex-col gap-1"> + {/* Microphone status */} + <div + className={`inline-flex flex-col gap-1 px-2 py-1 border ${ + micStatus === "ready" || micStatus === "recording" + ? "border-[#3f6fb3] text-[#75aafc]" + : micStatus === "denied" || micStatus === "error" + ? "border-red-400/50 text-red-400" + : "border-[rgba(117,170,252,0.25)] text-[#9bc3ff]" + }`} + > + <div className="inline-flex items-center gap-1.5"> + <span + className={`w-1.5 h-1.5 rounded-full ${ + micStatus === "ready" || micStatus === "recording" + ? "bg-[#75aafc]" + : micStatus === "denied" || micStatus === "error" + ? "bg-red-400" + : "bg-[#3f6fb3]" + }`} + /> + {micStatus === "ready" || micStatus === "recording" + ? "MIC READY" + : micStatus === "requesting" + ? "REQUESTING..." + : micStatus === "denied" + ? "MIC DENIED" + : micStatus === "error" + ? "MIC ERROR" + : "MIC IDLE"} + </div> + {isListening && ( + <div className="w-full h-1.5 bg-[#0f1c2f] overflow-hidden"> + <div + className="h-full bg-[#75aafc] transition-all duration-75" + style={{ width: `${micVolume * 100}%` }} + /> + </div> + )} + </div> + + {/* Connection status */} + <div + className={`inline-flex items-center gap-1.5 px-2 py-1 border ${ + isConnected + ? "border-[#3f6fb3] text-[#75aafc]" + : "border-[rgba(117,170,252,0.25)] text-[#9bc3ff]" + }`} + > + <span + className={`w-1.5 h-1.5 rounded-full ${ + isConnected ? "bg-[#75aafc]" : "bg-[#3f6fb3]" + }`} + /> + {isConnected ? "CONNECTED" : "DISCONNECTED"} + </div> + </div> + + {/* Error display */} + {error && ( + <div className="font-mono text-xs text-red-400 text-center px-2 py-1 border border-red-400/50 bg-red-400/10 max-w-[250px]"> + {error} + </div> + )} + + {/* Buttons */} + <div className="flex gap-2 mt-2"> + <button + onClick={onReset} + className="px-3 py-1.5 font-mono text-xs text-[#dbe7ff] bg-[#0d1b2d] border border-[#0f3c78] hover:border-[#3f6fb3] transition-colors uppercase tracking-wide" + > + Reset + </button> + <button + disabled + className="px-3 py-1.5 font-mono text-xs text-[#9aa9c6] bg-[#0b1423] border border-[rgba(117,170,252,0.25)] cursor-not-allowed uppercase tracking-wide opacity-50" + title="File upload coming soon" + > + Upload + </button> + </div> + </div> + ); +} diff --git a/makima/frontend/src/components/listen/SpeakerPanel.tsx b/makima/frontend/src/components/listen/SpeakerPanel.tsx new file mode 100644 index 0000000..cb43992 --- /dev/null +++ b/makima/frontend/src/components/listen/SpeakerPanel.tsx @@ -0,0 +1,61 @@ +interface Speaker { + id: string; + label: string; + isActive: boolean; +} + +interface SpeakerPanelProps { + speakers: Speaker[]; +} + +const SPEAKER_SYMBOLS = ["///", ":::", "***", "###", "+++", "---", "===", "%%%"]; + +export function SpeakerPanel({ speakers }: SpeakerPanelProps) { + return ( + <div className="panel h-full p-4 flex flex-col"> + <div className="font-mono text-xs text-[#9bc3ff] tracking-wide uppercase mb-3 pb-2 border-b border-dashed border-[rgba(117,170,252,0.35)]"> + SPEAKERS// + </div> + + {speakers.length === 0 ? ( + <div className="flex-1 flex items-center justify-center text-[#9bc3ff] text-sm font-mono opacity-60"> + <span>Waiting for speech...</span> + </div> + ) : ( + <div className="flex-1 flex flex-col gap-3"> + {speakers.map((speaker, index) => ( + <div + key={speaker.id} + className={`flex items-center gap-3 p-3 border ${ + speaker.isActive + ? "border-[#3f6fb3] bg-[#0f1c2f]" + : "border-[rgba(117,170,252,0.25)] bg-[#0b1423]" + } transition-colors`} + > + <span + className={`font-mono text-2xl tracking-tighter ${ + speaker.isActive + ? "text-[#75aafc] animate-pulse" + : "text-[#3f6fb3]" + }`} + > + {SPEAKER_SYMBOLS[index % SPEAKER_SYMBOLS.length]} + </span> + <div className="flex-1"> + <div className="font-mono text-sm text-[#dbe7ff]"> + {speaker.label} + </div> + <div className="font-mono text-xs text-[#9bc3ff]"> + {speaker.isActive ? "speaking" : "idle"} + </div> + </div> + {speaker.isActive && ( + <div className="w-2 h-2 rounded-full bg-[#75aafc] animate-pulse" /> + )} + </div> + ))} + </div> + )} + </div> + ); +} diff --git a/makima/frontend/src/components/listen/TranscriptPanel.tsx b/makima/frontend/src/components/listen/TranscriptPanel.tsx new file mode 100644 index 0000000..662c94f --- /dev/null +++ b/makima/frontend/src/components/listen/TranscriptPanel.tsx @@ -0,0 +1,85 @@ +import { useRef, useEffect, useState, useCallback } from "react"; +import type { TranscriptEntry } from "../../types/messages"; + +interface TranscriptPanelProps { + transcripts: TranscriptEntry[]; +} + +export function TranscriptPanel({ transcripts }: TranscriptPanelProps) { + const containerRef = useRef<HTMLDivElement>(null); + const [autoScroll, setAutoScroll] = useState(true); + + // Auto-scroll when new transcripts arrive + useEffect(() => { + if (autoScroll && containerRef.current) { + containerRef.current.scrollTop = containerRef.current.scrollHeight; + } + }, [transcripts, autoScroll]); + + // Detect manual scroll + const handleScroll = useCallback(() => { + if (!containerRef.current) return; + + const { scrollTop, scrollHeight, clientHeight } = containerRef.current; + const isAtBottom = scrollHeight - scrollTop - clientHeight < 50; + + setAutoScroll(isAtBottom); + }, []); + + const scrollToBottom = useCallback(() => { + if (containerRef.current) { + containerRef.current.scrollTop = containerRef.current.scrollHeight; + setAutoScroll(true); + } + }, []); + + return ( + <div className="panel h-full flex flex-col"> + <div className="font-mono text-xs text-[#9bc3ff] tracking-wide uppercase p-4 pb-2 border-b border-dashed border-[rgba(117,170,252,0.35)] flex justify-between items-center"> + <span>TRANSCRIPT//</span> + {!autoScroll && ( + <button + onClick={scrollToBottom} + className="px-2 py-1 text-[10px] bg-[#0f1c2f] border border-[#3f6fb3] hover:bg-[#153667] transition-colors" + > + Scroll to bottom + </button> + )} + </div> + + <div + ref={containerRef} + onScroll={handleScroll} + className="flex-1 overflow-y-auto p-4 space-y-3" + > + {transcripts.length === 0 ? ( + <div className="text-center text-[#9bc3ff] text-sm font-mono opacity-60 py-8"> + Transcriptions will appear here... + </div> + ) : ( + transcripts.map((entry) => ( + <div + key={entry.id} + className={`font-mono text-sm ${ + entry.isFinal ? "opacity-100" : "opacity-70" + }`} + > + <div className="flex items-baseline gap-2 mb-1"> + <span className="text-[#75aafc] text-xs"> + [{entry.start.toFixed(2)}s - {entry.end.toFixed(2)}s] + </span> + <span className="text-[#9bc3ff] text-xs font-bold"> + {entry.speaker} + </span> + {entry.isFinal && ( + <span className="text-[#3f6fb3] text-[10px]">[FINAL]</span> + )} + </div> + <p className="m-0 text-[#dbe7ff] leading-relaxed">{entry.text}</p> + </div> + )) + )} + </div> + </div> + ); +} diff --git a/makima/frontend/src/hooks/useMicrophone.ts b/makima/frontend/src/hooks/useMicrophone.ts new file mode 100644 index 0000000..307904b --- /dev/null +++ b/makima/frontend/src/hooks/useMicrophone.ts @@ -0,0 +1,248 @@ +import { useState, useCallback, useRef, useEffect } from "react"; + +export type MicrophoneStatus = + | "idle" + | "requesting" + | "ready" + | "recording" + | "denied" + | "error"; + +export interface MicrophoneState { + status: MicrophoneStatus; + error: string | null; + sampleRate: number; + channels: number; + volume: number; +} + +interface UseMicrophoneOptions { + sampleRate?: number; + onAudioData?: (samples: Float32Array) => void; +} + +function getErrorMessage(err: unknown): { message: string; status: MicrophoneStatus } { + if (err instanceof DOMException) { + switch (err.name) { + case "NotAllowedError": + case "PermissionDeniedError": + return { message: "Microphone permission denied", status: "denied" }; + case "NotFoundError": + return { message: "No microphone found", status: "error" }; + case "NotReadableError": + case "TrackStartError": + return { message: "Microphone is in use by another application", status: "error" }; + case "OverconstrainedError": + return { message: "Microphone does not support requested settings", status: "error" }; + case "AbortError": + return { message: "Microphone access was aborted", status: "error" }; + case "SecurityError": + return { message: "Microphone access blocked (requires HTTPS)", status: "error" }; + default: + return { message: `Microphone error: ${err.name} - ${err.message}`, status: "error" }; + } + } + + if (err instanceof Error) { + return { message: err.message, status: "error" }; + } + + return { message: "Failed to access microphone", status: "error" }; +} + +export function useMicrophone(options: UseMicrophoneOptions = {}) { + const { onAudioData } = options; + + const [state, setState] = useState<MicrophoneState>({ + status: "idle", + error: null, + sampleRate: 48000, + channels: 1, + volume: 0, + }); + + const streamRef = useRef<MediaStream | null>(null); + const audioContextRef = useRef<AudioContext | null>(null); + const processorRef = useRef<ScriptProcessorNode | null>(null); + const sourceRef = useRef<MediaStreamAudioSourceNode | null>(null); + const onAudioDataRef = useRef(onAudioData); + + // Keep callback ref updated + useEffect(() => { + onAudioDataRef.current = onAudioData; + }, [onAudioData]); + + // Check if microphone permission is already granted + const checkPermission = useCallback(async (): Promise<boolean> => { + try { + const result = await navigator.permissions.query({ + name: "microphone" as PermissionName, + }); + return result.state === "granted"; + } catch { + return false; + } + }, []); + + // Request microphone permission without starting recording + const requestPermission = useCallback(async (): Promise<boolean> => { + setState((s) => ({ ...s, status: "requesting", error: null })); + + // Check for secure context + if (typeof window !== "undefined" && !window.isSecureContext) { + setState((s) => ({ ...s, status: "error", error: "Microphone requires HTTPS (or localhost)" })); + return false; + } + + try { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + // Permission granted - stop the stream immediately + stream.getTracks().forEach((track) => track.stop()); + setState((s) => ({ ...s, status: "ready", error: null })); + return true; + } catch (err) { + const { message, status } = getErrorMessage(err); + setState((s) => ({ ...s, status, error: message })); + return false; + } + }, []); + + const start = useCallback(async (): Promise<boolean> => { + if (state.status === "recording") return true; + + setState((s) => ({ ...s, status: "requesting", error: null })); + + // Check for secure context + if (typeof window !== "undefined" && !window.isSecureContext) { + setState((s) => ({ ...s, status: "error", error: "Microphone requires HTTPS (or localhost)" })); + return false; + } + + let stream: MediaStream; + + try { + stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + } catch (err) { + const { message, status } = getErrorMessage(err); + setState((s) => ({ ...s, status, error: message })); + return false; + } + + try { + streamRef.current = stream; + + // Create audio context + const AudioContextClass = window.AudioContext || (window as unknown as { webkitAudioContext: typeof AudioContext }).webkitAudioContext; + const audioContext = new AudioContextClass(); + audioContextRef.current = audioContext; + + // Resume audio context if it's suspended + if (audioContext.state === "suspended") { + await audioContext.resume(); + } + + // Create source from microphone + const source = audioContext.createMediaStreamSource(stream); + sourceRef.current = source; + + // Use ScriptProcessor for audio processing + const bufferSize = 4096; + const processor = audioContext.createScriptProcessor(bufferSize, 1, 1); + processorRef.current = processor; + + processor.onaudioprocess = (event) => { + const inputData = event.inputBuffer.getChannelData(0); + const samples = new Float32Array(inputData.length); + samples.set(inputData); + + // Calculate RMS volume (0-1 range) + let sum = 0; + for (let i = 0; i < samples.length; i++) { + sum += samples[i] * samples[i]; + } + const rms = Math.sqrt(sum / samples.length); + // Normalize and clamp to 0-1 range (typical speech is around 0.1-0.3 RMS) + const normalizedVolume = Math.min(1, rms * 3); + setState((s) => ({ ...s, volume: normalizedVolume })); + + if (onAudioDataRef.current) { + onAudioDataRef.current(samples); + } + }; + + source.connect(processor); + processor.connect(audioContext.destination); + + setState((s) => ({ + ...s, + status: "recording", + sampleRate: audioContext.sampleRate, + error: null, + })); + + return true; + } catch (err) { + stream.getTracks().forEach((track) => track.stop()); + streamRef.current = null; + + const { message, status } = getErrorMessage(err); + setState((s) => ({ ...s, status, error: message })); + return false; + } + }, [state.status]); + + const stop = useCallback(() => { + if (processorRef.current && sourceRef.current) { + try { + sourceRef.current.disconnect(processorRef.current); + processorRef.current.disconnect(); + } catch { + // Already disconnected + } + processorRef.current = null; + sourceRef.current = null; + } + + if (streamRef.current) { + streamRef.current.getTracks().forEach((track) => track.stop()); + streamRef.current = null; + } + + if (audioContextRef.current) { + audioContextRef.current.close(); + audioContextRef.current = null; + } + + setState((s) => ({ ...s, status: "idle", error: null, volume: 0 })); + }, []); + + // Cleanup on unmount + useEffect(() => { + return () => { + if (processorRef.current && sourceRef.current) { + try { + sourceRef.current.disconnect(processorRef.current); + processorRef.current.disconnect(); + } catch { + // Already disconnected + } + } + if (streamRef.current) { + streamRef.current.getTracks().forEach((track) => track.stop()); + } + if (audioContextRef.current) { + audioContextRef.current.close(); + } + }; + }, []); + + return { + ...state, + start, + stop, + checkPermission, + requestPermission, + isRecording: state.status === "recording", + isDenied: state.status === "denied", + }; +} diff --git a/makima/frontend/src/hooks/useTextScramble.ts b/makima/frontend/src/hooks/useTextScramble.ts new file mode 100644 index 0000000..bb3f365 --- /dev/null +++ b/makima/frontend/src/hooks/useTextScramble.ts @@ -0,0 +1,52 @@ +import { useState, useCallback, useRef } from "react"; + +const GLYPHS = "▒▓░█#@*+:-/[]{}<>_"; + +export function useTextScramble(originalText: string) { + const [displayText, setDisplayText] = useState(originalText); + const timerRef = useRef<ReturnType<typeof setInterval> | null>(null); + const iterationRef = useRef(0); + + const scramble = useCallback(() => { + // Clear any existing animation + if (timerRef.current) { + clearInterval(timerRef.current); + } + + iterationRef.current = 0; + + timerRef.current = setInterval(() => { + const text = originalText; + const iteration = iterationRef.current; + + const display = text + .split("") + .map((char, index) => { + if (index < iteration) return char; + return GLYPHS.charAt(Math.floor(Math.random() * GLYPHS.length)); + }) + .join(""); + + setDisplayText(display); + iterationRef.current += 1; + + if (iteration > text.length + 2) { + if (timerRef.current) { + clearInterval(timerRef.current); + timerRef.current = null; + } + setDisplayText(originalText); + } + }, 26); + }, [originalText]); + + const reset = useCallback(() => { + if (timerRef.current) { + clearInterval(timerRef.current); + timerRef.current = null; + } + setDisplayText(originalText); + }, [originalText]); + + return { displayText, scramble, reset }; +} diff --git a/makima/frontend/src/hooks/useWebSocket.ts b/makima/frontend/src/hooks/useWebSocket.ts new file mode 100644 index 0000000..de6c1a6 --- /dev/null +++ b/makima/frontend/src/hooks/useWebSocket.ts @@ -0,0 +1,244 @@ +import { useState, useCallback, useRef, useEffect } from "react"; +import { LISTEN_ENDPOINT } from "../lib/api"; +import type { + ClientMessage, + ServerMessage, + TranscriptEntry, +} from "../types/messages"; + +export type ConnectionStatus = + | "disconnected" + | "connecting" + | "connected" + | "error"; + +export interface WebSocketState { + status: ConnectionStatus; + sessionId: string | null; + error: string | null; + transcripts: TranscriptEntry[]; +} + +interface UseWebSocketOptions { + onReady?: (sessionId: string) => void; + onTranscript?: (transcript: TranscriptEntry) => void; + onError?: (code: string, message: string) => void; + onStopped?: (reason: string) => void; +} + +export function useWebSocket(options: UseWebSocketOptions = {}) { + const { onReady, onTranscript, onError, onStopped } = options; + + const [state, setState] = useState<WebSocketState>({ + status: "disconnected", + sessionId: null, + error: null, + transcripts: [], + }); + + const wsRef = useRef<WebSocket | null>(null); + const transcriptIdRef = useRef(0); + + // Store callbacks in refs to avoid recreating handlers + const callbacksRef = useRef({ onReady, onTranscript, onError, onStopped }); + useEffect(() => { + callbacksRef.current = { onReady, onTranscript, onError, onStopped }; + }, [onReady, onTranscript, onError, onStopped]); + + const connect = useCallback((): Promise<boolean> => { + return new Promise((resolve) => { + if (wsRef.current?.readyState === WebSocket.OPEN) { + resolve(true); + return; + } + + // Close any existing connection + if (wsRef.current) { + wsRef.current.close(); + wsRef.current = null; + } + + setState((s) => ({ ...s, status: "connecting", error: null })); + + try { + const ws = new WebSocket(LISTEN_ENDPOINT); + wsRef.current = ws; + + ws.onopen = () => { + setState((s) => ({ ...s, status: "connected", error: null })); + resolve(true); + }; + + ws.onmessage = (event) => { + try { + const message: ServerMessage = JSON.parse(event.data); + + switch (message.type) { + case "ready": + setState((s) => ({ ...s, sessionId: message.sessionId })); + callbacksRef.current.onReady?.(message.sessionId); + break; + + case "transcript": { + const entry: TranscriptEntry = { + id: `t-${++transcriptIdRef.current}`, + speaker: message.speaker, + start: message.start, + end: message.end, + text: message.text, + isFinal: message.isFinal, + }; + + setState((s) => { + if (message.isFinal) { + // Final transcript replaces all previous transcripts from this speaker + const filtered = s.transcripts.filter( + (t) => t.speaker !== message.speaker + ); + return { ...s, transcripts: [...filtered, entry] }; + } else { + // Non-final: replace if same speaker and overlapping time, otherwise append + const existingIdx = s.transcripts.findIndex( + (t) => + !t.isFinal && + t.speaker === message.speaker && + Math.abs(t.start - message.start) < 0.1 + ); + if (existingIdx >= 0) { + const newTranscripts = [...s.transcripts]; + newTranscripts[existingIdx] = entry; + return { ...s, transcripts: newTranscripts }; + } + return { ...s, transcripts: [...s.transcripts, entry] }; + } + }); + + callbacksRef.current.onTranscript?.(entry); + break; + } + + case "error": + setState((s) => ({ ...s, error: message.message })); + callbacksRef.current.onError?.(message.code, message.message); + break; + + case "stopped": + setState((s) => ({ ...s, status: "disconnected" })); + callbacksRef.current.onStopped?.(message.reason); + break; + } + } catch { + console.error("Failed to parse WebSocket message:", event.data); + } + }; + + ws.onerror = () => { + setState((s) => ({ + ...s, + status: "error", + error: "Failed to connect to server", + })); + resolve(false); + }; + + ws.onclose = (event) => { + // Check for specific close codes + let errorMessage: string | null = null; + if (event.code === 1006) { + errorMessage = "Connection failed - server may be unavailable"; + } else if (event.code !== 1000 && event.code !== 1001) { + errorMessage = `Connection closed unexpectedly (code: ${event.code})`; + } + + setState((s) => ({ + ...s, + status: "disconnected", + sessionId: null, + error: errorMessage || s.error, + })); + wsRef.current = null; + }; + } catch (err) { + const message = err instanceof Error ? err.message : "Failed to create WebSocket connection"; + setState((s) => ({ + ...s, + status: "error", + error: message, + })); + resolve(false); + } + }); + }, []); + + const disconnect = useCallback(() => { + if (wsRef.current) { + wsRef.current.close(1000, "User disconnected"); + wsRef.current = null; + } + setState((s) => ({ ...s, status: "disconnected", sessionId: null })); + }, []); + + const sendMessage = useCallback((message: ClientMessage) => { + if (wsRef.current?.readyState === WebSocket.OPEN) { + wsRef.current.send(JSON.stringify(message)); + } + }, []); + + const sendAudio = useCallback((samples: Float32Array) => { + if (wsRef.current?.readyState === WebSocket.OPEN) { + // Convert Float32Array to bytes (little-endian) + const bytes = new Uint8Array(samples.length * 4); + const view = new DataView(bytes.buffer); + for (let i = 0; i < samples.length; i++) { + view.setFloat32(i * 4, samples[i], true); + } + wsRef.current.send(bytes); + } + }, []); + + const startSession = useCallback( + (sampleRate: number, channels: number = 1) => { + sendMessage({ + type: "start", + sampleRate, + channels, + encoding: "pcm32f", + }); + }, + [sendMessage] + ); + + const stopSession = useCallback( + (reason?: string) => { + sendMessage({ + type: "stop", + reason, + }); + }, + [sendMessage] + ); + + const clearTranscripts = useCallback(() => { + setState((s) => ({ ...s, transcripts: [], error: null })); + }, []); + + // Cleanup on unmount + useEffect(() => { + return () => { + if (wsRef.current) { + wsRef.current.close(); + } + }; + }, []); + + return { + ...state, + connect, + disconnect, + sendAudio, + startSession, + stopSession, + clearTranscripts, + isConnected: state.status === "connected", + }; +} diff --git a/makima/frontend/src/index.css b/makima/frontend/src/index.css new file mode 100644 index 0000000..178fb9b --- /dev/null +++ b/makima/frontend/src/index.css @@ -0,0 +1,390 @@ +@import "tailwindcss"; + +@theme { + --color-bg-primary: #02040a; + --color-bg-panel: rgba(9, 13, 20, 0.92); + --color-bg-masthead: #08162e; + --color-bg-nav: #0c1729; + --color-bg-input: #0f1c2f; + --color-bg-button: #0d1b2d; + --color-bg-terminal: #0f3c78; + + --color-blue-primary: #0f3c78; + --color-blue-accent: #3f6fb3; + --color-blue-glow: #75aafc; + + --color-text-primary: #dbe7ff; + --color-text-secondary: #9bc3ff; + --color-text-muted: #e4edff; + + --color-border-primary: #2f476d; + --color-border-accent: #3f6fb3; + + --font-body: "Yu Mincho", "Hiragino Mincho ProN", "MS PMincho", "MS Mincho", "Noto Serif JP", serif; + --font-mono: "MS Gothic", "Osaka-mono", Consolas, "Courier New", monospace; +} + +html { + background-color: var(--color-bg-primary); +} + +body { + margin: 0; + padding: 0; + color: var(--color-text-primary); + min-height: 100vh; + background-color: var(--color-bg-primary); + background-image: linear-gradient( + rgba(8, 22, 46, 0.65), + rgba(8, 22, 46, 0.65) + ), + url("/02cdkj.jpeg"); + background-size: cover; + background-repeat: no-repeat; + background-position: center; + background-attachment: fixed; + font-family: var(--font-body); + line-height: 1.7; + letter-spacing: 0.03em; + text-rendering: optimizeLegibility; + position: relative; + overflow-x: hidden; +} + +::selection { + background: rgba(117, 170, 252, 0.35); +} + +/* Grid overlay */ +.grid-overlay { + position: fixed; + inset: 0; + pointer-events: none; + z-index: 1; +} + +.grid-overlay::before { + content: ""; + position: absolute; + inset: 0; + background: linear-gradient( + 90deg, + rgba(117, 170, 252, 0.22) 1px, + rgba(255, 255, 255, 0.08) 1px + ), + linear-gradient( + 0deg, + rgba(117, 170, 252, 0.22) 1px, + rgba(255, 255, 255, 0.08) 1px + ); + background-size: 28px 28px; + mix-blend-mode: screen; + opacity: 0.8; +} + +.grid-overlay::after { + content: ""; + position: absolute; + inset: 0; + background: linear-gradient( + 180deg, + rgba(3, 17, 40, 0.8) 0%, + rgba(7, 15, 29, 0.5) 45%, + rgba(3, 17, 40, 0.8) 100% + ); +} + +/* Logo animations */ +@keyframes sweep-out { + 0% { + opacity: 0.45; + transform: scale(0.55); + } + 40% { + opacity: 0.7; + transform: scale(1); + } + 100% { + opacity: 0; + transform: scale(1.35); + } +} + +@keyframes ring-scan { + 0% { + stroke: #0f3c78; + opacity: 0.9; + stroke-dasharray: 400 0; + stroke-linecap: butt; + } + 25% { + stroke: #6fa0ff; + opacity: 1; + stroke-dasharray: 18 14; + stroke-linecap: round; + } + 55% { + stroke: #1e4c94; + opacity: 0.88; + stroke-dasharray: 10 18; + } + 70% { + stroke: #0f3c78; + opacity: 0.9; + stroke-dasharray: 6 22; + } + 90% { + stroke: #aac6ff; + opacity: 0.82; + stroke-dasharray: 4 26; + } + 100% { + stroke: #0f3c78; + opacity: 0.9; + stroke-dasharray: 400 0; + stroke-linecap: butt; + } +} + +@keyframes idle-rotate { + from { + transform: rotate(0deg); + } + to { + transform: rotate(360deg); + } +} + +@keyframes glow-pulse { + 0% { + opacity: 0.16; + transform: scale(1.02); + } + 50% { + opacity: 0.28; + transform: scale(1.12); + } + 100% { + opacity: 0.16; + transform: scale(1.02); + } +} + +@keyframes pulse { + 0% { + transform: scale(1); + opacity: 0.6; + } + 50% { + transform: scale(1.1); + opacity: 1; + } + 100% { + transform: scale(1); + opacity: 0.6; + } +} + +@keyframes ticker { + 0% { + transform: translateX(0); + } + 100% { + transform: translateX(-50%); + } +} + +/* Logo shell */ +.logo-shell { + border-radius: 50%; + border: 1px solid rgba(63, 111, 179, 0.6); + background: #070f1d; + display: grid; + place-items: center; + position: relative; + overflow: hidden; +} + +.logo-shell::before { + content: ""; + position: absolute; + inset: 10px; + border-radius: 50%; + border: 1px solid rgba(15, 60, 120, 0.35); + box-shadow: 0 0 0 rgba(15, 60, 120, 0.1); + opacity: 0; + transform: scale(0.9); + transition: opacity 0.45s ease, transform 0.6s ease; +} + +.logo-shell::after { + content: ""; + position: absolute; + inset: -1px; + background: repeating-conic-gradient( + from 0deg, + rgba(15, 60, 120, 0.08) 0deg 6deg, + rgba(117, 170, 252, 0.1) 6deg 12deg + ); + mask-image: radial-gradient(circle, rgba(0, 0, 0, 0.25) 40%, transparent 80%); + opacity: 0.7; +} + +.logo-shell.listening .ring-outer, +.logo-shell:not(.no-hover-animation):hover .ring-outer { + animation: ring-scan 2.4s cubic-bezier(0.45, 0.05, 0.25, 1) infinite; + animation-delay: 0.7s; +} + +.logo-shell.listening .ring-middle, +.logo-shell:not(.no-hover-animation):hover .ring-middle { + animation: ring-scan 2.4s cubic-bezier(0.45, 0.05, 0.25, 1) infinite; + animation-delay: 0.35s; +} + +.logo-shell.listening .ring-inner, +.logo-shell:not(.no-hover-animation):hover .ring-inner { + animation: ring-scan 2.4s cubic-bezier(0.45, 0.05, 0.25, 1) infinite; + animation-delay: 0s; +} + +.logo-shell.listening::before, +.logo-shell:not(.no-hover-animation):hover::before { + opacity: 1; + transform: scale(1.08); + animation: glow-pulse 1.6s ease-in-out infinite; +} + +.logo-shell.listening .scan-sweep, +.logo-shell:not(.no-hover-animation):hover .scan-sweep { + opacity: 0.7; + animation: sweep-out 2.4s ease-out infinite; +} + +.logo-shell.listening .logo-svg, +.logo-shell:not(.no-hover-animation):hover .logo-svg { + filter: drop-shadow(0 0 10px rgba(15, 60, 120, 0.35)); +} + +.scan-sweep { + position: absolute; + inset: 10%; + border-radius: 50%; + background: radial-gradient( + circle, + rgba(117, 170, 252, 0.25) 0%, + rgba(117, 170, 252, 0.12) 38%, + rgba(15, 60, 120, 0.06) 64%, + transparent 85% + ); + opacity: 0; + mix-blend-mode: screen; + filter: blur(0.8px); + z-index: 1; + transform: scale(0.65); +} + +.scan-sweep.sweep-2 { + animation-delay: 1.2s; +} + +.logo-svg { + z-index: 2; + transition: filter 0.4s ease; +} + +.logo-svg .ring { + fill: none; + stroke: #0f3c78; + stroke-dasharray: 400 0; + stroke-linecap: butt; + transition: stroke 0.35s ease, opacity 0.35s ease; +} + +.logo-svg .core { + fill: #0f3c78; +} + +.halo { + position: absolute; + border-radius: 50%; + border: 1px dashed rgba(15, 60, 120, 0.5); + animation: idle-rotate 28s linear infinite; +} + +.logo-shell.listening + .halo, +.logo-shell:not(.no-hover-animation):hover + .halo { + animation: pulse 1.5s ease-in-out infinite; +} + +/* Ticker */ +.ticker-content { + display: inline-block; + padding-left: 2px; + animation: ticker 22s linear infinite; + white-space: nowrap; +} + +/* Rewrite link scramble effect */ +.rewrite-link { + position: relative; + display: inline-block; + padding: 4px 8px; + border: 1px dashed rgba(117, 170, 252, 0.5); + background: #0f1c2f; + text-decoration: none; + color: #eef5ff; + font-family: var(--font-mono); + overflow: hidden; + transition: border-color 0.2s; +} + +.rewrite-link:hover { + border-color: rgba(117, 170, 252, 0.8); +} + +.rewrite-link[aria-disabled="true"] { + color: #9aa9c6; + border-color: rgba(117, 170, 252, 0.25); + cursor: not-allowed; + pointer-events: none; + background: #0b1423; +} + +/* Panel styles */ +.panel { + background: var(--color-bg-panel); + border: 1px solid var(--color-border-primary); + position: relative; +} + +.panel::after { + content: ""; + position: absolute; + inset: 0; + border: 1px dashed rgba(89, 122, 170, 0.25); + pointer-events: none; +} + +/* Double border top decoration */ +.border-top-double { + border-top: 4px double #050d1f; +} + +/* Stripe decoration for frame top */ +.stripe-top::before { + content: ""; + position: absolute; + top: 0; + left: 0; + width: 100%; + height: 6px; + background: repeating-linear-gradient( + 90deg, + #0f3c78 0px, + #0f3c78 40px, + #3f6fb3 40px, + #3f6fb3 44px + ); + opacity: 0.7; +} diff --git a/makima/frontend/src/lib/api.ts b/makima/frontend/src/lib/api.ts new file mode 100644 index 0000000..a6f6c3e --- /dev/null +++ b/makima/frontend/src/lib/api.ts @@ -0,0 +1,40 @@ +const API_CONFIG = { + local: { + http: "http://localhost:8080", + ws: "ws://localhost:8080", + }, + production: { + http: "https://api.makima.jp", + ws: "wss://api.makima.jp", + }, +} as const; + +type Environment = "local" | "production"; + +function detectEnvironment(): Environment { + // Check if explicitly set via env var + const envOverride = import.meta.env.VITE_API_ENV as Environment | undefined; + if (envOverride && (envOverride === "local" || envOverride === "production")) { + return envOverride; + } + + // Auto-detect based on hostname + if (typeof window !== "undefined") { + const hostname = window.location.hostname; + if (hostname === "localhost" || hostname === "127.0.0.1") { + return "local"; + } + } + + return "production"; +} + +const env = detectEnvironment(); + +export const API_BASE = API_CONFIG[env].http; +export const WS_BASE = API_CONFIG[env].ws; +export const LISTEN_ENDPOINT = `${WS_BASE}/api/v1/listen`; + +export function getEnvironment(): Environment { + return env; +} diff --git a/makima/frontend/src/main.tsx b/makima/frontend/src/main.tsx new file mode 100644 index 0000000..fe5be21 --- /dev/null +++ b/makima/frontend/src/main.tsx @@ -0,0 +1,19 @@ +import { StrictMode } from "react"; +import { createRoot } from "react-dom/client"; +import { BrowserRouter, Routes, Route } from "react-router"; +import "./index.css"; +import { GridOverlay } from "./components/GridOverlay"; +import HomePage from "./routes/_index"; +import ListenPage from "./routes/listen"; + +createRoot(document.getElementById("root")!).render( + <StrictMode> + <BrowserRouter> + <GridOverlay /> + <Routes> + <Route path="/" element={<HomePage />} /> + <Route path="/listen" element={<ListenPage />} /> + </Routes> + </BrowserRouter> + </StrictMode> +); diff --git a/makima/frontend/src/routes/_index.tsx b/makima/frontend/src/routes/_index.tsx new file mode 100644 index 0000000..568e300 --- /dev/null +++ b/makima/frontend/src/routes/_index.tsx @@ -0,0 +1,113 @@ +import { Masthead } from "../components/Masthead"; +import { Logo } from "../components/Logo"; +import { RewriteLink } from "../components/RewriteLink"; + +export default function HomePage() { + return ( + <div className="relative z-10 min-h-screen flex flex-col"> + <Masthead showTicker showNav /> + + <main className="flex-1 p-6 md:p-8"> + <section className="grid grid-cols-1 md:grid-cols-[1.1fr_0.9fr] gap-6 items-center mb-8"> + <div className="order-2 md:order-1"> + <span className="inline-block px-2 py-1 border border-[#3f6fb3] bg-[#0f1c2f] text-[#9bc3ff] font-mono text-xs tracking-wide uppercase mb-3"> + Real-time STT Platform + </span> + <h2 className="m-0 mb-3 text-xl text-[#f0f5ff] tracking-wide"> + リアルタイム音声認識 + </h2> + <p className="my-2 text-[#e4edff]"> + Makima provides real-time speech-to-text transcription with speaker + diarization. Stream audio from your microphone or upload files for + instant transcription. + </p> + <p className="my-2 text-[#e4edff]"> + 高精度なリアルタイム音声認識エンジン。WebSocket + 経由でストリーミング処理を行い、話者分離にも対応。 + </p> + + <div className="flex flex-wrap gap-3 mt-4"> + <RewriteLink to="/listen" className="!py-2 !px-3 text-sm"> + Start Listening + </RewriteLink> + <RewriteLink href="https://github.com/soryu-co" external className="!py-2 !px-3 text-sm"> + GitHub + </RewriteLink> + <RewriteLink href="https://soryu.co" external className="!py-2 !px-3 text-sm"> + soryu.co + </RewriteLink> + </div> + </div> + + <div className="order-1 md:order-2 flex justify-center"> + <Logo size={160} /> + </div> + </section> + + <div className="panel p-4 md:p-5 relative"> + <span className="absolute -top-2.5 left-3 px-1.5 py-0.5 bg-[#0f1c2f] border border-[#3f6fb3] font-mono text-xs tracking-wide"> + FEATURES//MAKIMA + </span> + <h3 className="mt-2 mb-3 text-lg text-[#eef5ff]">Platform Features</h3> + <div className="grid grid-cols-1 md:grid-cols-2 gap-3 mt-3 text-sm text-[#e4edff]"> + <div className="border border-[#3f6fb3] bg-[#0d1b2d] p-3 font-mono tracking-tight"> + <strong className="text-[#9bc3ff]">Real-time Streaming</strong> + <br /> + WebSocket-based audio streaming with low latency transcription + </div> + <div className="border border-[#3f6fb3] bg-[#0d1b2d] p-3 font-mono tracking-tight"> + <strong className="text-[#9bc3ff]">Speaker Diarization</strong> + <br /> + Automatic speaker identification and labeling + </div> + <div className="border border-[#3f6fb3] bg-[#0d1b2d] p-3 font-mono tracking-tight"> + <strong className="text-[#9bc3ff]">End-of-Utterance</strong> + <br /> + Smart detection of speech boundaries + </div> + <div className="border border-[#3f6fb3] bg-[#0d1b2d] p-3 font-mono tracking-tight"> + <strong className="text-[#9bc3ff]">Multi-format</strong> + <br /> + Support for PCM32F and PCM16 audio encoding + </div> + </div> + </div> + + <div className="mt-6 border border-[#0f3c78] bg-[#0f3c78] text-[#dbe7ff] shadow-[inset_0_0_0_1px_rgba(255,255,255,0.1)]"> + <header className="border-b border-[rgba(197,219,255,0.4)] px-3 py-2.5 font-mono tracking-wide text-xs flex justify-between items-center"> + <strong>API // makima.jp</strong> + <span className="text-[#92b8ff]">endpoint: wss://api.makima.jp</span> + </header> + <pre className="m-0 px-3 py-4 font-mono text-xs leading-relaxed bg-[linear-gradient(0deg,rgba(255,255,255,0.05)_1px,rgba(15,60,120,0.25)_1px)]"> +{`[001] endpoint ............... /api/v1/listen +[002] protocol ............... WebSocket +[003] encoding ............... pcm32f / pcm16 +[004] sample_rate ............ 16000 Hz (default) +[005] features ............... transcription, diarization, eou`} + </pre> + </div> + </main> + + <footer className="px-4 py-3 border-t-4 border-double border-[#23477a] bg-gradient-to-b from-[rgba(11,18,32,0.95)] to-[rgba(14,26,45,0.7)] text-xs text-[#e4edff] text-center"> + <p className="m-1">© makima.jp</p> + <p className="m-1"> + <a + href="https://makima.jp" + className="text-[#dbe7ff] underline" + > + https://makima.jp + </a> + {" / "} + <a + href="https://soryu.co" + target="_blank" + rel="noopener noreferrer" + className="text-[#dbe7ff] underline" + > + https://soryu.co + </a> + </p> + </footer> + </div> + ); +} diff --git a/makima/frontend/src/routes/listen.tsx b/makima/frontend/src/routes/listen.tsx new file mode 100644 index 0000000..06769fd --- /dev/null +++ b/makima/frontend/src/routes/listen.tsx @@ -0,0 +1,158 @@ +import { useState, useCallback, useMemo, useEffect, useRef } from "react"; +import { Masthead } from "../components/Masthead"; +import { SpeakerPanel } from "../components/listen/SpeakerPanel"; +import { TranscriptPanel } from "../components/listen/TranscriptPanel"; +import { ControlPanel } from "../components/listen/ControlPanel"; +import { useMicrophone } from "../hooks/useMicrophone"; +import { useWebSocket } from "../hooks/useWebSocket"; + +export default function ListenPage() { + const [isListening, setIsListening] = useState(false); + const [activeSpeaker, setActiveSpeaker] = useState<string | null>(null); + const [permissionRequested, setPermissionRequested] = useState(false); + const isListeningRef = useRef(false); + + // Keep ref in sync with state for use in callbacks + useEffect(() => { + isListeningRef.current = isListening; + }, [isListening]); + + const ws = useWebSocket({ + onTranscript: (transcript) => { + // Track active speaker + if (!transcript.isFinal) { + setActiveSpeaker(transcript.speaker); + } + }, + onStopped: () => { + setIsListening(false); + setActiveSpeaker(null); + }, + }); + + const wsRef = useRef(ws); + useEffect(() => { + wsRef.current = ws; + }, [ws]); + + const handleAudioData = useCallback((samples: Float32Array) => { + if (wsRef.current.isConnected && isListeningRef.current) { + wsRef.current.sendAudio(samples); + } + }, []); + + const mic = useMicrophone({ + sampleRate: 16000, + onAudioData: handleAudioData, + }); + + // Request microphone permission on page load + useEffect(() => { + if (!permissionRequested) { + setPermissionRequested(true); + mic.requestPermission(); + } + }, [permissionRequested, mic.requestPermission]); + + // Derive unique speakers from transcripts + const speakers = useMemo(() => { + const speakerSet = new Set<string>(); + ws.transcripts.forEach((t) => speakerSet.add(t.speaker)); + + return Array.from(speakerSet).map((speaker) => ({ + id: speaker, + label: speaker, + isActive: speaker === activeSpeaker, + })); + }, [ws.transcripts, activeSpeaker]); + + // Clear active speaker after a delay + useEffect(() => { + if (activeSpeaker) { + const timer = setTimeout(() => setActiveSpeaker(null), 1500); + return () => clearTimeout(timer); + } + }, [activeSpeaker]); + + const handleToggle = useCallback(async () => { + if (isListening) { + // Stop listening + mic.stop(); + ws.stopSession("user_stopped"); + setIsListening(false); + setActiveSpeaker(null); + return; + } + + // If permission was denied or errored, try requesting again + if (mic.status === "denied" || mic.status === "error") { + const permitted = await mic.requestPermission(); + if (!permitted) { + return; + } + } + + // Start listening - start the microphone + const micStarted = await mic.start(); + if (!micStarted) { + // Microphone permission denied or error + return; + } + + // Microphone started, now connect to WebSocket + const connected = await ws.connect(); + if (!connected) { + // Connection failed - stop the microphone + mic.stop(); + return; + } + + // Both microphone and WebSocket are ready - start the session + ws.startSession(mic.sampleRate, mic.channels); + setIsListening(true); + }, [isListening, mic, ws]); + + const handleReset = useCallback(() => { + mic.stop(); + if (ws.isConnected) { + ws.stopSession("reset"); + } + ws.clearTranscripts(); + ws.disconnect(); + setIsListening(false); + setActiveSpeaker(null); + }, [mic, ws]); + + const error = ws.error || mic.error; + + return ( + <div className="relative z-10 min-h-screen flex flex-col"> + <Masthead showTicker={false} showNav /> + + <main className="flex-1 p-4 md:p-6 grid grid-cols-1 md:grid-cols-[300px_1fr] grid-rows-[1fr_auto] md:grid-rows-[1fr_200px] gap-4 min-h-0"> + {/* Speaker Panel - top left on desktop, hidden on mobile */} + <div className="hidden md:block row-span-1"> + <SpeakerPanel speakers={speakers} /> + </div> + + {/* Transcript Panel - right side, spans 2 rows on desktop */} + <div className="md:row-span-2 min-h-[300px] md:min-h-0"> + <TranscriptPanel transcripts={ws.transcripts} /> + </div> + + {/* Control Panel - bottom left on desktop */} + <div className="md:col-start-1 md:row-start-2"> + <ControlPanel + isListening={isListening} + isConnected={ws.isConnected} + micStatus={mic.status} + micVolume={mic.volume} + onToggle={handleToggle} + onReset={handleReset} + error={error} + /> + </div> + </main> + </div> + ); +} diff --git a/makima/frontend/src/types/messages.ts b/makima/frontend/src/types/messages.ts new file mode 100644 index 0000000..070cdfb --- /dev/null +++ b/makima/frontend/src/types/messages.ts @@ -0,0 +1,56 @@ +// Client -> Server messages +export type StartMessage = { + type: "start"; + sampleRate: number; + channels: number; + encoding: "pcm32f" | "pcm16"; +}; + +export type StopMessage = { + type: "stop"; + reason?: string; +}; + +export type ClientMessage = StartMessage | StopMessage; + +// Server -> Client messages +export type ReadyMessage = { + type: "ready"; + sessionId: string; +}; + +export type TranscriptMessage = { + type: "transcript"; + speaker: string; + start: number; + end: number; + text: string; + isFinal: boolean; +}; + +export type ErrorMessage = { + type: "error"; + code: string; + message: string; +}; + +export type StoppedMessage = { + type: "stopped"; + reason: string; +}; + +export type ServerMessage = + | ReadyMessage + | TranscriptMessage + | ErrorMessage + | StoppedMessage; + +// Transcript entry for display +export interface TranscriptEntry { + id: string; + speaker: string; + start: number; + end: number; + text: string; + isFinal: boolean; +} diff --git a/makima/frontend/src/vite-env.d.ts b/makima/frontend/src/vite-env.d.ts new file mode 100644 index 0000000..aa6d893 --- /dev/null +++ b/makima/frontend/src/vite-env.d.ts @@ -0,0 +1,14 @@ +/// <reference types="vite/client" /> + +interface ImportMetaEnv { + readonly VITE_API_ENV?: "local" | "production"; +} + +interface ImportMeta { + readonly env: ImportMetaEnv; +} + +declare module "*.css" { + const content: string; + export default content; +} |
