summaryrefslogblamecommitdiff
path: root/makima/frontend/src/hooks/useMicrophone.ts
blob: 307904be06af13e67153ca34761b22c24599c57a (plain) (tree)























































































































































































































































                                                                                                                                             
import { useState, useCallback, useRef, useEffect } from "react";

export type MicrophoneStatus =
  | "idle"
  | "requesting"
  | "ready"
  | "recording"
  | "denied"
  | "error";

export interface MicrophoneState {
  status: MicrophoneStatus;
  error: string | null;
  sampleRate: number;
  channels: number;
  volume: number;
}

interface UseMicrophoneOptions {
  sampleRate?: number;
  onAudioData?: (samples: Float32Array) => void;
}

function getErrorMessage(err: unknown): { message: string; status: MicrophoneStatus } {
  if (err instanceof DOMException) {
    switch (err.name) {
      case "NotAllowedError":
      case "PermissionDeniedError":
        return { message: "Microphone permission denied", status: "denied" };
      case "NotFoundError":
        return { message: "No microphone found", status: "error" };
      case "NotReadableError":
      case "TrackStartError":
        return { message: "Microphone is in use by another application", status: "error" };
      case "OverconstrainedError":
        return { message: "Microphone does not support requested settings", status: "error" };
      case "AbortError":
        return { message: "Microphone access was aborted", status: "error" };
      case "SecurityError":
        return { message: "Microphone access blocked (requires HTTPS)", status: "error" };
      default:
        return { message: `Microphone error: ${err.name} - ${err.message}`, status: "error" };
    }
  }

  if (err instanceof Error) {
    return { message: err.message, status: "error" };
  }

  return { message: "Failed to access microphone", status: "error" };
}

export function useMicrophone(options: UseMicrophoneOptions = {}) {
  const { onAudioData } = options;

  const [state, setState] = useState<MicrophoneState>({
    status: "idle",
    error: null,
    sampleRate: 48000,
    channels: 1,
    volume: 0,
  });

  const streamRef = useRef<MediaStream | null>(null);
  const audioContextRef = useRef<AudioContext | null>(null);
  const processorRef = useRef<ScriptProcessorNode | null>(null);
  const sourceRef = useRef<MediaStreamAudioSourceNode | null>(null);
  const onAudioDataRef = useRef(onAudioData);

  // Keep callback ref updated
  useEffect(() => {
    onAudioDataRef.current = onAudioData;
  }, [onAudioData]);

  // Check if microphone permission is already granted
  const checkPermission = useCallback(async (): Promise<boolean> => {
    try {
      const result = await navigator.permissions.query({
        name: "microphone" as PermissionName,
      });
      return result.state === "granted";
    } catch {
      return false;
    }
  }, []);

  // Request microphone permission without starting recording
  const requestPermission = useCallback(async (): Promise<boolean> => {
    setState((s) => ({ ...s, status: "requesting", error: null }));

    // Check for secure context
    if (typeof window !== "undefined" && !window.isSecureContext) {
      setState((s) => ({ ...s, status: "error", error: "Microphone requires HTTPS (or localhost)" }));
      return false;
    }

    try {
      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
      // Permission granted - stop the stream immediately
      stream.getTracks().forEach((track) => track.stop());
      setState((s) => ({ ...s, status: "ready", error: null }));
      return true;
    } catch (err) {
      const { message, status } = getErrorMessage(err);
      setState((s) => ({ ...s, status, error: message }));
      return false;
    }
  }, []);

  const start = useCallback(async (): Promise<boolean> => {
    if (state.status === "recording") return true;

    setState((s) => ({ ...s, status: "requesting", error: null }));

    // Check for secure context
    if (typeof window !== "undefined" && !window.isSecureContext) {
      setState((s) => ({ ...s, status: "error", error: "Microphone requires HTTPS (or localhost)" }));
      return false;
    }

    let stream: MediaStream;

    try {
      stream = await navigator.mediaDevices.getUserMedia({ audio: true });
    } catch (err) {
      const { message, status } = getErrorMessage(err);
      setState((s) => ({ ...s, status, error: message }));
      return false;
    }

    try {
      streamRef.current = stream;

      // Create audio context
      const AudioContextClass = window.AudioContext || (window as unknown as { webkitAudioContext: typeof AudioContext }).webkitAudioContext;
      const audioContext = new AudioContextClass();
      audioContextRef.current = audioContext;

      // Resume audio context if it's suspended
      if (audioContext.state === "suspended") {
        await audioContext.resume();
      }

      // Create source from microphone
      const source = audioContext.createMediaStreamSource(stream);
      sourceRef.current = source;

      // Use ScriptProcessor for audio processing
      const bufferSize = 4096;
      const processor = audioContext.createScriptProcessor(bufferSize, 1, 1);
      processorRef.current = processor;

      processor.onaudioprocess = (event) => {
        const inputData = event.inputBuffer.getChannelData(0);
        const samples = new Float32Array(inputData.length);
        samples.set(inputData);

        // Calculate RMS volume (0-1 range)
        let sum = 0;
        for (let i = 0; i < samples.length; i++) {
          sum += samples[i] * samples[i];
        }
        const rms = Math.sqrt(sum / samples.length);
        // Normalize and clamp to 0-1 range (typical speech is around 0.1-0.3 RMS)
        const normalizedVolume = Math.min(1, rms * 3);
        setState((s) => ({ ...s, volume: normalizedVolume }));

        if (onAudioDataRef.current) {
          onAudioDataRef.current(samples);
        }
      };

      source.connect(processor);
      processor.connect(audioContext.destination);

      setState((s) => ({
        ...s,
        status: "recording",
        sampleRate: audioContext.sampleRate,
        error: null,
      }));

      return true;
    } catch (err) {
      stream.getTracks().forEach((track) => track.stop());
      streamRef.current = null;

      const { message, status } = getErrorMessage(err);
      setState((s) => ({ ...s, status, error: message }));
      return false;
    }
  }, [state.status]);

  const stop = useCallback(() => {
    if (processorRef.current && sourceRef.current) {
      try {
        sourceRef.current.disconnect(processorRef.current);
        processorRef.current.disconnect();
      } catch {
        // Already disconnected
      }
      processorRef.current = null;
      sourceRef.current = null;
    }

    if (streamRef.current) {
      streamRef.current.getTracks().forEach((track) => track.stop());
      streamRef.current = null;
    }

    if (audioContextRef.current) {
      audioContextRef.current.close();
      audioContextRef.current = null;
    }

    setState((s) => ({ ...s, status: "idle", error: null, volume: 0 }));
  }, []);

  // Cleanup on unmount
  useEffect(() => {
    return () => {
      if (processorRef.current && sourceRef.current) {
        try {
          sourceRef.current.disconnect(processorRef.current);
          processorRef.current.disconnect();
        } catch {
          // Already disconnected
        }
      }
      if (streamRef.current) {
        streamRef.current.getTracks().forEach((track) => track.stop());
      }
      if (audioContextRef.current) {
        audioContextRef.current.close();
      }
    };
  }, []);

  return {
    ...state,
    start,
    stop,
    checkPermission,
    requestPermission,
    isRecording: state.status === "recording",
    isDenied: state.status === "denied",
  };
}