summaryrefslogtreecommitdiff
path: root/makima/frontend/src/hooks/useMicrophone.ts
diff options
context:
space:
mode:
authorsoryu <soryu@soryu.co>2025-12-22 04:50:25 +0000
committersoryu <soryu@soryu.co>2025-12-23 14:47:18 +0000
commit0741a8b8e9a2099c82bff6d6b9ebbce9c07cad53 (patch)
tree88cbd5fecb9ca72a04aa07f1a6db4e1a751b1fd7 /makima/frontend/src/hooks/useMicrophone.ts
parentaee2e4e784afd6d115fb5f7b40284c4efd2da966 (diff)
downloadsoryu-0741a8b8e9a2099c82bff6d6b9ebbce9c07cad53.tar.gz
soryu-0741a8b8e9a2099c82bff6d6b9ebbce9c07cad53.zip
Update makima FE to add initial listening system
Diffstat (limited to 'makima/frontend/src/hooks/useMicrophone.ts')
-rw-r--r--makima/frontend/src/hooks/useMicrophone.ts248
1 files changed, 248 insertions, 0 deletions
diff --git a/makima/frontend/src/hooks/useMicrophone.ts b/makima/frontend/src/hooks/useMicrophone.ts
new file mode 100644
index 0000000..307904b
--- /dev/null
+++ b/makima/frontend/src/hooks/useMicrophone.ts
@@ -0,0 +1,248 @@
+import { useState, useCallback, useRef, useEffect } from "react";
+
+export type MicrophoneStatus =
+ | "idle"
+ | "requesting"
+ | "ready"
+ | "recording"
+ | "denied"
+ | "error";
+
+export interface MicrophoneState {
+ status: MicrophoneStatus;
+ error: string | null;
+ sampleRate: number;
+ channels: number;
+ volume: number;
+}
+
+interface UseMicrophoneOptions {
+ sampleRate?: number;
+ onAudioData?: (samples: Float32Array) => void;
+}
+
+function getErrorMessage(err: unknown): { message: string; status: MicrophoneStatus } {
+ if (err instanceof DOMException) {
+ switch (err.name) {
+ case "NotAllowedError":
+ case "PermissionDeniedError":
+ return { message: "Microphone permission denied", status: "denied" };
+ case "NotFoundError":
+ return { message: "No microphone found", status: "error" };
+ case "NotReadableError":
+ case "TrackStartError":
+ return { message: "Microphone is in use by another application", status: "error" };
+ case "OverconstrainedError":
+ return { message: "Microphone does not support requested settings", status: "error" };
+ case "AbortError":
+ return { message: "Microphone access was aborted", status: "error" };
+ case "SecurityError":
+ return { message: "Microphone access blocked (requires HTTPS)", status: "error" };
+ default:
+ return { message: `Microphone error: ${err.name} - ${err.message}`, status: "error" };
+ }
+ }
+
+ if (err instanceof Error) {
+ return { message: err.message, status: "error" };
+ }
+
+ return { message: "Failed to access microphone", status: "error" };
+}
+
+export function useMicrophone(options: UseMicrophoneOptions = {}) {
+ const { onAudioData } = options;
+
+ const [state, setState] = useState<MicrophoneState>({
+ status: "idle",
+ error: null,
+ sampleRate: 48000,
+ channels: 1,
+ volume: 0,
+ });
+
+ const streamRef = useRef<MediaStream | null>(null);
+ const audioContextRef = useRef<AudioContext | null>(null);
+ const processorRef = useRef<ScriptProcessorNode | null>(null);
+ const sourceRef = useRef<MediaStreamAudioSourceNode | null>(null);
+ const onAudioDataRef = useRef(onAudioData);
+
+ // Keep callback ref updated
+ useEffect(() => {
+ onAudioDataRef.current = onAudioData;
+ }, [onAudioData]);
+
+ // Check if microphone permission is already granted
+ const checkPermission = useCallback(async (): Promise<boolean> => {
+ try {
+ const result = await navigator.permissions.query({
+ name: "microphone" as PermissionName,
+ });
+ return result.state === "granted";
+ } catch {
+ return false;
+ }
+ }, []);
+
+ // Request microphone permission without starting recording
+ const requestPermission = useCallback(async (): Promise<boolean> => {
+ setState((s) => ({ ...s, status: "requesting", error: null }));
+
+ // Check for secure context
+ if (typeof window !== "undefined" && !window.isSecureContext) {
+ setState((s) => ({ ...s, status: "error", error: "Microphone requires HTTPS (or localhost)" }));
+ return false;
+ }
+
+ try {
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+ // Permission granted - stop the stream immediately
+ stream.getTracks().forEach((track) => track.stop());
+ setState((s) => ({ ...s, status: "ready", error: null }));
+ return true;
+ } catch (err) {
+ const { message, status } = getErrorMessage(err);
+ setState((s) => ({ ...s, status, error: message }));
+ return false;
+ }
+ }, []);
+
+ const start = useCallback(async (): Promise<boolean> => {
+ if (state.status === "recording") return true;
+
+ setState((s) => ({ ...s, status: "requesting", error: null }));
+
+ // Check for secure context
+ if (typeof window !== "undefined" && !window.isSecureContext) {
+ setState((s) => ({ ...s, status: "error", error: "Microphone requires HTTPS (or localhost)" }));
+ return false;
+ }
+
+ let stream: MediaStream;
+
+ try {
+ stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+ } catch (err) {
+ const { message, status } = getErrorMessage(err);
+ setState((s) => ({ ...s, status, error: message }));
+ return false;
+ }
+
+ try {
+ streamRef.current = stream;
+
+ // Create audio context
+ const AudioContextClass = window.AudioContext || (window as unknown as { webkitAudioContext: typeof AudioContext }).webkitAudioContext;
+ const audioContext = new AudioContextClass();
+ audioContextRef.current = audioContext;
+
+ // Resume audio context if it's suspended
+ if (audioContext.state === "suspended") {
+ await audioContext.resume();
+ }
+
+ // Create source from microphone
+ const source = audioContext.createMediaStreamSource(stream);
+ sourceRef.current = source;
+
+ // Use ScriptProcessor for audio processing
+ const bufferSize = 4096;
+ const processor = audioContext.createScriptProcessor(bufferSize, 1, 1);
+ processorRef.current = processor;
+
+ processor.onaudioprocess = (event) => {
+ const inputData = event.inputBuffer.getChannelData(0);
+ const samples = new Float32Array(inputData.length);
+ samples.set(inputData);
+
+ // Calculate RMS volume (0-1 range)
+ let sum = 0;
+ for (let i = 0; i < samples.length; i++) {
+ sum += samples[i] * samples[i];
+ }
+ const rms = Math.sqrt(sum / samples.length);
+ // Normalize and clamp to 0-1 range (typical speech is around 0.1-0.3 RMS)
+ const normalizedVolume = Math.min(1, rms * 3);
+ setState((s) => ({ ...s, volume: normalizedVolume }));
+
+ if (onAudioDataRef.current) {
+ onAudioDataRef.current(samples);
+ }
+ };
+
+ source.connect(processor);
+ processor.connect(audioContext.destination);
+
+ setState((s) => ({
+ ...s,
+ status: "recording",
+ sampleRate: audioContext.sampleRate,
+ error: null,
+ }));
+
+ return true;
+ } catch (err) {
+ stream.getTracks().forEach((track) => track.stop());
+ streamRef.current = null;
+
+ const { message, status } = getErrorMessage(err);
+ setState((s) => ({ ...s, status, error: message }));
+ return false;
+ }
+ }, [state.status]);
+
+ const stop = useCallback(() => {
+ if (processorRef.current && sourceRef.current) {
+ try {
+ sourceRef.current.disconnect(processorRef.current);
+ processorRef.current.disconnect();
+ } catch {
+ // Already disconnected
+ }
+ processorRef.current = null;
+ sourceRef.current = null;
+ }
+
+ if (streamRef.current) {
+ streamRef.current.getTracks().forEach((track) => track.stop());
+ streamRef.current = null;
+ }
+
+ if (audioContextRef.current) {
+ audioContextRef.current.close();
+ audioContextRef.current = null;
+ }
+
+ setState((s) => ({ ...s, status: "idle", error: null, volume: 0 }));
+ }, []);
+
+ // Cleanup on unmount
+ useEffect(() => {
+ return () => {
+ if (processorRef.current && sourceRef.current) {
+ try {
+ sourceRef.current.disconnect(processorRef.current);
+ processorRef.current.disconnect();
+ } catch {
+ // Already disconnected
+ }
+ }
+ if (streamRef.current) {
+ streamRef.current.getTracks().forEach((track) => track.stop());
+ }
+ if (audioContextRef.current) {
+ audioContextRef.current.close();
+ }
+ };
+ }, []);
+
+ return {
+ ...state,
+ start,
+ stop,
+ checkPermission,
+ requestPermission,
+ isRecording: state.status === "recording",
+ isDenied: state.status === "denied",
+ };
+}