added echo cancelation

2026-03-30 21:18:07 +01:00
parent dc14d00cc8
commit cac01c0590
3 changed files with 137 additions and 112 deletions
--- a/src/hooks/useLiveVoice.ts
+++ b/src/hooks/useLiveVoice.ts
@@ -1,88 +1,114 @@
 // src/hooks/useLiveVoice.ts
-"use client";
+import { useEffect, useRef, useCallback, useState } from "react";
-import { useRef, useState, useCallback, useEffect } from "react";
+import { onTTSSpeakingChange } from "@/lib/tts";
-type LiveVoiceOptions = {
+interface UseLiveVoiceOptions {
  onUtterance: (text: string) => void;
  onSpeechStart?: () => void;
-};
+}
-export function useLiveVoice({ onUtterance, onSpeechStart }: LiveVoiceOptions) {
+export function useLiveVoice({ onUtterance, onSpeechStart }: UseLiveVoiceOptions) {
  const [isListening, setIsListening] = useState(false);
  const [isSpeaking, setIsSpeaking] = useState(false);
  const recognitionRef    = useRef<SpeechRecognition | null>(null);
  const stoppedManually   = useRef(false);
  const ttsActiveRef      = useRef(false);       // ← tracks TTS state
  const pendingRestartRef = useRef(false);        // ← restart queued?
-  const start = useCallback(() => {
+  const [isListening, setIsListening] = useState(false);
-    const SpeechRecognition =
+  const [isSpeaking,  setIsSpeaking]  = useState(false);
      window.SpeechRecognition || (window as any).webkitSpeechRecognition;
-    if (!SpeechRecognition) {
+  // ── Internal start/stop helpers ────────────────────────────────────────────
-      alert("Your browser doesn't support SpeechRecognition. Try Chrome.");
+
  const startRecognition = useCallback(() => {
    if (ttsActiveRef.current) {
      // TTS is playing — queue a restart for when it finishes
      pendingRestartRef.current = true;
      return;
    }
    const SpeechRecognition =
      window.SpeechRecognition || (window as any).webkitSpeechRecognition;
    if (!SpeechRecognition) return;
-    const recognition = new SpeechRecognition();
+    const r = new SpeechRecognition();
-    recognition.continuous = true;       // keep listening between utterances
+    r.continuous      = true;
-    recognition.interimResults = false;  // only fire when utterance is complete
+    r.interimResults  = false;
-    recognition.lang = "en-GB";
+    r.lang            = "en-US";
-    recognition.onstart = () => {
+    r.onstart       = () => setIsListening(true);
-      setIsListening(true);
+    r.onspeechstart = () => { setIsSpeaking(true); onSpeechStart?.(); };
    r.onspeechend   = () => setIsSpeaking(false);
    r.onresult = (e: SpeechRecognitionEvent) => {
      // Drop any result that came in while TTS was active
      if (ttsActiveRef.current) return;
      const transcript = Array.from(e.results)
        .filter((r) => r.isFinal)
        .map((r) => r[0].transcript)
        .join(" ")
        .trim();
      if (transcript) onUtterance(transcript);
    };
-    recognition.onspeechstart = () => {
+    r.onend = () => {
      setIsSpeaking(true);
      onSpeechStart?.();
    };
    recognition.onspeechend = () => {
      setIsSpeaking(false);
    };
    recognition.onresult = (event: SpeechRecognitionEvent) => {
      const last = event.results[event.results.length - 1];
      if (last.isFinal) {
        const text = last[0].transcript.trim();
        if (text) onUtterance(text);
      }
    };
    recognition.onerror = (e: SpeechRecognitionErrorEvent) => {
      // 'no-speech' is normal background silence — just ignore it
      if (e.error === "no-speech") return;
      console.error("SpeechRecognition error:", e.error);
    };
    recognition.onend = () => {
      // Auto-restart unless we stopped it manually
      if (!stoppedManually.current) {
        recognition.start();
      } else {
      setIsListening(false);
      setIsSpeaking(false);
      // Auto-restart unless the user stopped manually or TTS is active
      if (!stoppedManually.current && !ttsActiveRef.current) {
        setTimeout(() => startRecognition(), 200);
      }
    };
-    stoppedManually.current = false;
+    r.onerror = (e: SpeechRecognitionErrorEvent) => {
-    recognition.start();
+      if (e.error !== "no-speech" && e.error !== "aborted") {
-    recognitionRef.current = recognition;
+        console.warn("SpeechRecognition error:", e.error);
      }
    };
    recognitionRef.current = r;
    r.start();
  }, [onUtterance, onSpeechStart]);
-  const stop = useCallback(() => {
+  const stopRecognition = useCallback(() => {
    stoppedManually.current = true;
    recognitionRef.current?.stop();
    recognitionRef.current = null;
    setIsListening(false);
    setIsSpeaking(false);
  }, []);
  // ── TTS listener — pause mic while bot speaks ──────────────────────────────
  useEffect(() => {
-    return () => {
+    const unsub = onTTSSpeakingChange((speaking) => {
      ttsActiveRef.current = speaking;
      if (speaking) {
        // Bot started talking — stop the mic immediately
        pendingRestartRef.current = false;
        stopRecognition();
      } else {
        // Bot finished talking — restart mic after a short silence gap
        // so the tail of the TTS audio doesn't get transcribed
        setTimeout(() => {
          if (!stoppedManually.current) {
            pendingRestartRef.current = false;
            startRecognition();
          }
        }, 600); // 600ms grace period after TTS ends
      }
    });
    return unsub;
  }, [startRecognition, stopRecognition]);
  // ── Public API ─────────────────────────────────────────────────────────────
  const start = useCallback(() => {
    stoppedManually.current = false;
    startRecognition();
  }, [startRecognition]);
  const stop = useCallback(() => {
    stoppedManually.current = true;
-      recognitionRef.current?.stop();
+    stopRecognition();
-    };
+  }, [stopRecognition]);
  }, []);
  return { isListening, isSpeaking, start, stop };
 }
--- a/src/hooks/useVoiceRecorder.ts
+++ b/src/hooks/useVoiceRecorder.ts
@@ -1,46 +1,47 @@
 // src/hooks/useVoiceRecorder.ts
-"use client";
+import { useRef, useState } from "react";
 import { useRef, useState, useCallback } from "react";
 export function useVoiceRecorder() {
  const [isRecording, setIsRecording] = useState(false);
  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
  const chunksRef        = useRef<Blob[]>([]);
  const [isRecording, setIsRecording] = useState(false);
-  const startRecording = useCallback(async () => {
+  const startRecording = async () => {
-    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+    // ↓ These constraints tell the browser's audio engine to suppress
-    const recorder = new MediaRecorder(stream, { mimeType: "audio/webm" });
+    //   echo from the speakers before the mic data ever reaches JS
    const stream = await navigator.mediaDevices.getUserMedia({
      audio: {
        echoCancellation:   true,   // ← removes speaker echo
        noiseSuppression:   true,   // ← removes background noise
        autoGainControl:    true,
        channelCount:       1,
        sampleRate:         16000,
      },
    });
    chunksRef.current = [];
-    recorder.ondataavailable = (e) => {
+    const recorder    = new MediaRecorder(stream);
-      if (e.data.size > 0) chunksRef.current.push(e.data);
+    recorder.ondataavailable = (e) => { if (e.data.size > 0) chunksRef.current.push(e.data); };
    };
    mediaRecorderRef.current = recorder;
-    recorder.start(250);
+    recorder.start();
    setIsRecording(true);
-  }, []);
+  };
-  // Returns a Float32Array that Transformers.js natively accepts
+  const stopRecording = (): Promise<Float32Array> => {
-  const stopRecording = useCallback((): Promise<Float32Array> => {
+    return new Promise((resolve) => {
    return new Promise((resolve, reject) => {
      const recorder = mediaRecorderRef.current;
-      if (!recorder) return reject("No recorder active");
+      if (!recorder) return resolve(new Float32Array(0));
      recorder.onstop = async () => {
        const blob    = new Blob(chunksRef.current, { type: "audio/webm" });
        const arrBuf  = await blob.arrayBuffer();
        const ctx     = new AudioContext({ sampleRate: 16000 });
        const decoded = await ctx.decodeAudioData(arrBuf);
        resolve(decoded.getChannelData(0));
        recorder.stream.getTracks().forEach((t) => t.stop());
        setIsRecording(false);
        const blob = new Blob(chunksRef.current, { type: "audio/webm" });
        const arrayBuffer = await blob.arrayBuffer();
        const audioCtx = new AudioContext({ sampleRate: 16000 });
        const decoded = await audioCtx.decodeAudioData(arrayBuffer);
        // Whisper expects mono 16kHz Float32Array
        resolve(decoded.getChannelData(0));
      };
      recorder.stop();
    });
-  }, []);
+  };
  return { isRecording, startRecording, stopRecording };
 }
--- a/src/lib/tts.ts
+++ b/src/lib/tts.ts
@@ -1,33 +1,31 @@
 // src/lib/tts.ts
-export function speak(text: string, onEnd?: () => void): void {
+
-  if (typeof window === "undefined" || !window.speechSynthesis) return;
+type TTSListener = (speaking: boolean) => void;
 const listeners = new Set<TTSListener>();
 export function onTTSSpeakingChange(fn: TTSListener) {
  listeners.add(fn);
  return () => listeners.delete(fn);
 }
 function notifyListeners(speaking: boolean) {
  listeners.forEach((fn) => fn(speaking));
 }
 export function speak(text: string, rate = 1, pitch = 1) {
  if (typeof window === "undefined") return;
  stopSpeaking();
  const utter = new SpeechSynthesisUtterance(text);
  utter.rate  = rate;
  utter.pitch = pitch;
  utter.onstart = () => notifyListeners(true);
  utter.onend   = () => notifyListeners(false);
  utter.onerror = () => notifyListeners(false);
  window.speechSynthesis.speak(utter);
 }
 export function stopSpeaking() {
  if (typeof window === "undefined") return;
  window.speechSynthesis.cancel();
-
+  notifyListeners(false);
  const utterance = new SpeechSynthesisUtterance(text);
  utterance.rate = 1.05;
  utterance.pitch = 1.0;
  // Wait for voices to load (Safari needs this)
  const trySpeak = () => {
    const voices = window.speechSynthesis.getVoices();
    const preferred = voices.find(
      (v) =>
        v.name.includes("Samantha") ||
        v.name.includes("Google UK English Female") ||
        v.name.includes("Google US English")
    );
    if (preferred) utterance.voice = preferred;
    if (onEnd) utterance.onend = onEnd;
    window.speechSynthesis.speak(utterance);
  };
  if (window.speechSynthesis.getVoices().length > 0) {
    trySpeak();
  } else {
    window.speechSynthesis.onvoiceschanged = trySpeak;
  }
 }
 export function stopSpeaking(): void {
  window.speechSynthesis?.cancel();
 }