added echo cancelation

2026-03-30 21:18:07 +01:00
parent dc14d00cc8
commit cac01c0590
3 changed files with 137 additions and 112 deletions
--- a/src/hooks/useLiveVoice.ts
+++ b/src/hooks/useLiveVoice.ts
@@ -1,88 +1,114 @@
 // src/hooks/useLiveVoice.ts
-"use client";
-import { useRef, useState, useCallback, useEffect } from "react";
+import { useEffect, useRef, useCallback, useState } from "react";
+import { onTTSSpeakingChange } from "@/lib/tts";

-type LiveVoiceOptions = {
+interface UseLiveVoiceOptions {
  onUtterance: (text: string) => void;
  onSpeechStart?: () => void;
-};
+}
+
+export function useLiveVoice({ onUtterance, onSpeechStart }: UseLiveVoiceOptions) {
+  const recognitionRef    = useRef<SpeechRecognition | null>(null);
+  const stoppedManually   = useRef(false);
+  const ttsActiveRef      = useRef(false);       // ← tracks TTS state
+  const pendingRestartRef = useRef(false);        // ← restart queued?

-export function useLiveVoice({ onUtterance, onSpeechStart }: LiveVoiceOptions) {
  const [isListening, setIsListening] = useState(false);
-  const [isSpeaking, setIsSpeaking] = useState(false);
-  const recognitionRef = useRef<SpeechRecognition | null>(null);
-  const stoppedManually = useRef(false);
+  const [isSpeaking,  setIsSpeaking]  = useState(false);

-  const start = useCallback(() => {
-    const SpeechRecognition =
-      window.SpeechRecognition || (window as any).webkitSpeechRecognition;
+  // ── Internal start/stop helpers ────────────────────────────────────────────

-    if (!SpeechRecognition) {
-      alert("Your browser doesn't support SpeechRecognition. Try Chrome.");
+  const startRecognition = useCallback(() => {
+    if (ttsActiveRef.current) {
+      // TTS is playing — queue a restart for when it finishes
+      pendingRestartRef.current = true;
      return;
    }
+    const SpeechRecognition =
+      window.SpeechRecognition || (window as any).webkitSpeechRecognition;
+    if (!SpeechRecognition) return;

-    const recognition = new SpeechRecognition();
-    recognition.continuous = true;       // keep listening between utterances
-    recognition.interimResults = false;  // only fire when utterance is complete
-    recognition.lang = "en-GB";
+    const r = new SpeechRecognition();
+    r.continuous      = true;
+    r.interimResults  = false;
+    r.lang            = "en-US";

-    recognition.onstart = () => {
-      setIsListening(true);
+    r.onstart       = () => setIsListening(true);
+    r.onspeechstart = () => { setIsSpeaking(true); onSpeechStart?.(); };
+    r.onspeechend   = () => setIsSpeaking(false);
+
+    r.onresult = (e: SpeechRecognitionEvent) => {
+      // Drop any result that came in while TTS was active
+      if (ttsActiveRef.current) return;
+      const transcript = Array.from(e.results)
+        .filter((r) => r.isFinal)
+        .map((r) => r[0].transcript)
+        .join(" ")
+        .trim();
+      if (transcript) onUtterance(transcript);
    };

-    recognition.onspeechstart = () => {
-      setIsSpeaking(true);
-      onSpeechStart?.();
-    };
-
-    recognition.onspeechend = () => {
+    r.onend = () => {
+      setIsListening(false);
      setIsSpeaking(false);
-    };
-
-    recognition.onresult = (event: SpeechRecognitionEvent) => {
-      const last = event.results[event.results.length - 1];
-      if (last.isFinal) {
-        const text = last[0].transcript.trim();
-        if (text) onUtterance(text);
+      // Auto-restart unless the user stopped manually or TTS is active
+      if (!stoppedManually.current && !ttsActiveRef.current) {
+        setTimeout(() => startRecognition(), 200);
      }
    };

-    recognition.onerror = (e: SpeechRecognitionErrorEvent) => {
-      // 'no-speech' is normal background silence — just ignore it
-      if (e.error === "no-speech") return;
-      console.error("SpeechRecognition error:", e.error);
-    };
-
-    recognition.onend = () => {
-      // Auto-restart unless we stopped it manually
-      if (!stoppedManually.current) {
-        recognition.start();
-      } else {
-        setIsListening(false);
-        setIsSpeaking(false);
+    r.onerror = (e: SpeechRecognitionErrorEvent) => {
+      if (e.error !== "no-speech" && e.error !== "aborted") {
+        console.warn("SpeechRecognition error:", e.error);
      }
    };

-    stoppedManually.current = false;
-    recognition.start();
-    recognitionRef.current = recognition;
+    recognitionRef.current = r;
+    r.start();
  }, [onUtterance, onSpeechStart]);

-  const stop = useCallback(() => {
-    stoppedManually.current = true;
+  const stopRecognition = useCallback(() => {
    recognitionRef.current?.stop();
    recognitionRef.current = null;
    setIsListening(false);
    setIsSpeaking(false);
  }, []);

+  // ── TTS listener — pause mic while bot speaks ──────────────────────────────
+
  useEffect(() => {
-    return () => {
-      stoppedManually.current = true;
-      recognitionRef.current?.stop();
-    };
-  }, []);
+    const unsub = onTTSSpeakingChange((speaking) => {
+      ttsActiveRef.current = speaking;
+
+      if (speaking) {
+        // Bot started talking — stop the mic immediately
+        pendingRestartRef.current = false;
+        stopRecognition();
+      } else {
+        // Bot finished talking — restart mic after a short silence gap
+        // so the tail of the TTS audio doesn't get transcribed
+        setTimeout(() => {
+          if (!stoppedManually.current) {
+            pendingRestartRef.current = false;
+            startRecognition();
+          }
+        }, 600); // 600ms grace period after TTS ends
+      }
+    });
+    return unsub;
+  }, [startRecognition, stopRecognition]);
+
+  // ── Public API ─────────────────────────────────────────────────────────────
+
+  const start = useCallback(() => {
+    stoppedManually.current = false;
+    startRecognition();
+  }, [startRecognition]);
+
+  const stop = useCallback(() => {
+    stoppedManually.current = true;
+    stopRecognition();
+  }, [stopRecognition]);

  return { isListening, isSpeaking, start, stop };
 }
--- a/src/hooks/useVoiceRecorder.ts
+++ b/src/hooks/useVoiceRecorder.ts
@@ -1,46 +1,47 @@
 // src/hooks/useVoiceRecorder.ts
-"use client";
-import { useRef, useState, useCallback } from "react";
+import { useRef, useState } from "react";

 export function useVoiceRecorder() {
-  const [isRecording, setIsRecording] = useState(false);
  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
-  const chunksRef = useRef<Blob[]>([]);
+  const chunksRef        = useRef<Blob[]>([]);
+  const [isRecording, setIsRecording] = useState(false);

-  const startRecording = useCallback(async () => {
-    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
-    const recorder = new MediaRecorder(stream, { mimeType: "audio/webm" });
+  const startRecording = async () => {
+    // ↓ These constraints tell the browser's audio engine to suppress
+    //   echo from the speakers before the mic data ever reaches JS
+    const stream = await navigator.mediaDevices.getUserMedia({
+      audio: {
+        echoCancellation:   true,   // ← removes speaker echo
+        noiseSuppression:   true,   // ← removes background noise
+        autoGainControl:    true,
+        channelCount:       1,
+        sampleRate:         16000,
+      },
+    });
    chunksRef.current = [];
-    recorder.ondataavailable = (e) => {
-      if (e.data.size > 0) chunksRef.current.push(e.data);
-    };
+    const recorder    = new MediaRecorder(stream);
+    recorder.ondataavailable = (e) => { if (e.data.size > 0) chunksRef.current.push(e.data); };
    mediaRecorderRef.current = recorder;
-    recorder.start(250);
+    recorder.start();
    setIsRecording(true);
-  }, []);
+  };

-  // Returns a Float32Array that Transformers.js natively accepts
-  const stopRecording = useCallback((): Promise<Float32Array> => {
-    return new Promise((resolve, reject) => {
+  const stopRecording = (): Promise<Float32Array> => {
+    return new Promise((resolve) => {
      const recorder = mediaRecorderRef.current;
-      if (!recorder) return reject("No recorder active");
-
+      if (!recorder) return resolve(new Float32Array(0));
      recorder.onstop = async () => {
+        const blob    = new Blob(chunksRef.current, { type: "audio/webm" });
+        const arrBuf  = await blob.arrayBuffer();
+        const ctx     = new AudioContext({ sampleRate: 16000 });
+        const decoded = await ctx.decodeAudioData(arrBuf);
+        resolve(decoded.getChannelData(0));
        recorder.stream.getTracks().forEach((t) => t.stop());
        setIsRecording(false);
-
-        const blob = new Blob(chunksRef.current, { type: "audio/webm" });
-        const arrayBuffer = await blob.arrayBuffer();
-        const audioCtx = new AudioContext({ sampleRate: 16000 });
-        const decoded = await audioCtx.decodeAudioData(arrayBuffer);
-
-        // Whisper expects mono 16kHz Float32Array
-        resolve(decoded.getChannelData(0));
      };
-
      recorder.stop();
    });
-  }, []);
+  };

  return { isRecording, startRecording, stopRecording };
 }
--- a/src/lib/tts.ts
+++ b/src/lib/tts.ts
@@ -1,33 +1,31 @@
 // src/lib/tts.ts
-export function speak(text: string, onEnd?: () => void): void {
-  if (typeof window === "undefined" || !window.speechSynthesis) return;
+
+type TTSListener = (speaking: boolean) => void;
+const listeners = new Set<TTSListener>();
+
+export function onTTSSpeakingChange(fn: TTSListener) {
+  listeners.add(fn);
+  return () => listeners.delete(fn);
+}
+
+function notifyListeners(speaking: boolean) {
+  listeners.forEach((fn) => fn(speaking));
+}
+
+export function speak(text: string, rate = 1, pitch = 1) {
+  if (typeof window === "undefined") return;
+  stopSpeaking();
+  const utter = new SpeechSynthesisUtterance(text);
+  utter.rate  = rate;
+  utter.pitch = pitch;
+  utter.onstart = () => notifyListeners(true);
+  utter.onend   = () => notifyListeners(false);
+  utter.onerror = () => notifyListeners(false);
+  window.speechSynthesis.speak(utter);
+}
+
+export function stopSpeaking() {
+  if (typeof window === "undefined") return;
  window.speechSynthesis.cancel();
-
-  const utterance = new SpeechSynthesisUtterance(text);
-  utterance.rate = 1.05;
-  utterance.pitch = 1.0;
-
-  // Wait for voices to load (Safari needs this)
-  const trySpeak = () => {
-    const voices = window.speechSynthesis.getVoices();
-    const preferred = voices.find(
-      (v) =>
-        v.name.includes("Samantha") ||
-        v.name.includes("Google UK English Female") ||
-        v.name.includes("Google US English")
-    );
-    if (preferred) utterance.voice = preferred;
-    if (onEnd) utterance.onend = onEnd;
-    window.speechSynthesis.speak(utterance);
-  };
-
-  if (window.speechSynthesis.getVoices().length > 0) {
-    trySpeak();
-  } else {
-    window.speechSynthesis.onvoiceschanged = trySpeak;
-  }
-}
-
-export function stopSpeaking(): void {
-  window.speechSynthesis?.cancel();
+  notifyListeners(false);
 }