From cac01c05909cec4f37a2d6a17a7bd2eba902f035 Mon Sep 17 00:00:00 2001 From: Will Date: Mon, 30 Mar 2026 21:18:07 +0100 Subject: [PATCH] added echo cancelation --- src/hooks/useLiveVoice.ts | 136 ++++++++++++++++++++-------------- src/hooks/useVoiceRecorder.ts | 55 +++++++------- src/lib/tts.ts | 58 +++++++-------- 3 files changed, 137 insertions(+), 112 deletions(-) diff --git a/src/hooks/useLiveVoice.ts b/src/hooks/useLiveVoice.ts index 1973db3..c378790 100644 --- a/src/hooks/useLiveVoice.ts +++ b/src/hooks/useLiveVoice.ts @@ -1,88 +1,114 @@ // src/hooks/useLiveVoice.ts -"use client"; -import { useRef, useState, useCallback, useEffect } from "react"; +import { useEffect, useRef, useCallback, useState } from "react"; +import { onTTSSpeakingChange } from "@/lib/tts"; -type LiveVoiceOptions = { +interface UseLiveVoiceOptions { onUtterance: (text: string) => void; onSpeechStart?: () => void; -}; +} + +export function useLiveVoice({ onUtterance, onSpeechStart }: UseLiveVoiceOptions) { + const recognitionRef = useRef(null); + const stoppedManually = useRef(false); + const ttsActiveRef = useRef(false); // ← tracks TTS state + const pendingRestartRef = useRef(false); // ← restart queued? -export function useLiveVoice({ onUtterance, onSpeechStart }: LiveVoiceOptions) { const [isListening, setIsListening] = useState(false); - const [isSpeaking, setIsSpeaking] = useState(false); - const recognitionRef = useRef(null); - const stoppedManually = useRef(false); + const [isSpeaking, setIsSpeaking] = useState(false); - const start = useCallback(() => { - const SpeechRecognition = - window.SpeechRecognition || (window as any).webkitSpeechRecognition; + // ── Internal start/stop helpers ──────────────────────────────────────────── - if (!SpeechRecognition) { - alert("Your browser doesn't support SpeechRecognition. Try Chrome."); + const startRecognition = useCallback(() => { + if (ttsActiveRef.current) { + // TTS is playing — queue a restart for when it finishes + pendingRestartRef.current = true; return; } + const SpeechRecognition = + window.SpeechRecognition || (window as any).webkitSpeechRecognition; + if (!SpeechRecognition) return; - const recognition = new SpeechRecognition(); - recognition.continuous = true; // keep listening between utterances - recognition.interimResults = false; // only fire when utterance is complete - recognition.lang = "en-GB"; + const r = new SpeechRecognition(); + r.continuous = true; + r.interimResults = false; + r.lang = "en-US"; - recognition.onstart = () => { - setIsListening(true); + r.onstart = () => setIsListening(true); + r.onspeechstart = () => { setIsSpeaking(true); onSpeechStart?.(); }; + r.onspeechend = () => setIsSpeaking(false); + + r.onresult = (e: SpeechRecognitionEvent) => { + // Drop any result that came in while TTS was active + if (ttsActiveRef.current) return; + const transcript = Array.from(e.results) + .filter((r) => r.isFinal) + .map((r) => r[0].transcript) + .join(" ") + .trim(); + if (transcript) onUtterance(transcript); }; - recognition.onspeechstart = () => { - setIsSpeaking(true); - onSpeechStart?.(); - }; - - recognition.onspeechend = () => { + r.onend = () => { + setIsListening(false); setIsSpeaking(false); - }; - - recognition.onresult = (event: SpeechRecognitionEvent) => { - const last = event.results[event.results.length - 1]; - if (last.isFinal) { - const text = last[0].transcript.trim(); - if (text) onUtterance(text); + // Auto-restart unless the user stopped manually or TTS is active + if (!stoppedManually.current && !ttsActiveRef.current) { + setTimeout(() => startRecognition(), 200); } }; - recognition.onerror = (e: SpeechRecognitionErrorEvent) => { - // 'no-speech' is normal background silence — just ignore it - if (e.error === "no-speech") return; - console.error("SpeechRecognition error:", e.error); - }; - - recognition.onend = () => { - // Auto-restart unless we stopped it manually - if (!stoppedManually.current) { - recognition.start(); - } else { - setIsListening(false); - setIsSpeaking(false); + r.onerror = (e: SpeechRecognitionErrorEvent) => { + if (e.error !== "no-speech" && e.error !== "aborted") { + console.warn("SpeechRecognition error:", e.error); } }; - stoppedManually.current = false; - recognition.start(); - recognitionRef.current = recognition; + recognitionRef.current = r; + r.start(); }, [onUtterance, onSpeechStart]); - const stop = useCallback(() => { - stoppedManually.current = true; + const stopRecognition = useCallback(() => { recognitionRef.current?.stop(); recognitionRef.current = null; setIsListening(false); setIsSpeaking(false); }, []); + // ── TTS listener — pause mic while bot speaks ────────────────────────────── + useEffect(() => { - return () => { - stoppedManually.current = true; - recognitionRef.current?.stop(); - }; - }, []); + const unsub = onTTSSpeakingChange((speaking) => { + ttsActiveRef.current = speaking; + + if (speaking) { + // Bot started talking — stop the mic immediately + pendingRestartRef.current = false; + stopRecognition(); + } else { + // Bot finished talking — restart mic after a short silence gap + // so the tail of the TTS audio doesn't get transcribed + setTimeout(() => { + if (!stoppedManually.current) { + pendingRestartRef.current = false; + startRecognition(); + } + }, 600); // 600ms grace period after TTS ends + } + }); + return unsub; + }, [startRecognition, stopRecognition]); + + // ── Public API ───────────────────────────────────────────────────────────── + + const start = useCallback(() => { + stoppedManually.current = false; + startRecognition(); + }, [startRecognition]); + + const stop = useCallback(() => { + stoppedManually.current = true; + stopRecognition(); + }, [stopRecognition]); return { isListening, isSpeaking, start, stop }; } diff --git a/src/hooks/useVoiceRecorder.ts b/src/hooks/useVoiceRecorder.ts index 033e645..18ff3a0 100644 --- a/src/hooks/useVoiceRecorder.ts +++ b/src/hooks/useVoiceRecorder.ts @@ -1,46 +1,47 @@ // src/hooks/useVoiceRecorder.ts -"use client"; -import { useRef, useState, useCallback } from "react"; +import { useRef, useState } from "react"; export function useVoiceRecorder() { - const [isRecording, setIsRecording] = useState(false); const mediaRecorderRef = useRef(null); - const chunksRef = useRef([]); + const chunksRef = useRef([]); + const [isRecording, setIsRecording] = useState(false); - const startRecording = useCallback(async () => { - const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - const recorder = new MediaRecorder(stream, { mimeType: "audio/webm" }); + const startRecording = async () => { + // ↓ These constraints tell the browser's audio engine to suppress + // echo from the speakers before the mic data ever reaches JS + const stream = await navigator.mediaDevices.getUserMedia({ + audio: { + echoCancellation: true, // ← removes speaker echo + noiseSuppression: true, // ← removes background noise + autoGainControl: true, + channelCount: 1, + sampleRate: 16000, + }, + }); chunksRef.current = []; - recorder.ondataavailable = (e) => { - if (e.data.size > 0) chunksRef.current.push(e.data); - }; + const recorder = new MediaRecorder(stream); + recorder.ondataavailable = (e) => { if (e.data.size > 0) chunksRef.current.push(e.data); }; mediaRecorderRef.current = recorder; - recorder.start(250); + recorder.start(); setIsRecording(true); - }, []); + }; - // Returns a Float32Array that Transformers.js natively accepts - const stopRecording = useCallback((): Promise => { - return new Promise((resolve, reject) => { + const stopRecording = (): Promise => { + return new Promise((resolve) => { const recorder = mediaRecorderRef.current; - if (!recorder) return reject("No recorder active"); - + if (!recorder) return resolve(new Float32Array(0)); recorder.onstop = async () => { + const blob = new Blob(chunksRef.current, { type: "audio/webm" }); + const arrBuf = await blob.arrayBuffer(); + const ctx = new AudioContext({ sampleRate: 16000 }); + const decoded = await ctx.decodeAudioData(arrBuf); + resolve(decoded.getChannelData(0)); recorder.stream.getTracks().forEach((t) => t.stop()); setIsRecording(false); - - const blob = new Blob(chunksRef.current, { type: "audio/webm" }); - const arrayBuffer = await blob.arrayBuffer(); - const audioCtx = new AudioContext({ sampleRate: 16000 }); - const decoded = await audioCtx.decodeAudioData(arrayBuffer); - - // Whisper expects mono 16kHz Float32Array - resolve(decoded.getChannelData(0)); }; - recorder.stop(); }); - }, []); + }; return { isRecording, startRecording, stopRecording }; } diff --git a/src/lib/tts.ts b/src/lib/tts.ts index c492cd2..49e8e64 100644 --- a/src/lib/tts.ts +++ b/src/lib/tts.ts @@ -1,33 +1,31 @@ // src/lib/tts.ts -export function speak(text: string, onEnd?: () => void): void { - if (typeof window === "undefined" || !window.speechSynthesis) return; + +type TTSListener = (speaking: boolean) => void; +const listeners = new Set(); + +export function onTTSSpeakingChange(fn: TTSListener) { + listeners.add(fn); + return () => listeners.delete(fn); +} + +function notifyListeners(speaking: boolean) { + listeners.forEach((fn) => fn(speaking)); +} + +export function speak(text: string, rate = 1, pitch = 1) { + if (typeof window === "undefined") return; + stopSpeaking(); + const utter = new SpeechSynthesisUtterance(text); + utter.rate = rate; + utter.pitch = pitch; + utter.onstart = () => notifyListeners(true); + utter.onend = () => notifyListeners(false); + utter.onerror = () => notifyListeners(false); + window.speechSynthesis.speak(utter); +} + +export function stopSpeaking() { + if (typeof window === "undefined") return; window.speechSynthesis.cancel(); - - const utterance = new SpeechSynthesisUtterance(text); - utterance.rate = 1.05; - utterance.pitch = 1.0; - - // Wait for voices to load (Safari needs this) - const trySpeak = () => { - const voices = window.speechSynthesis.getVoices(); - const preferred = voices.find( - (v) => - v.name.includes("Samantha") || - v.name.includes("Google UK English Female") || - v.name.includes("Google US English") - ); - if (preferred) utterance.voice = preferred; - if (onEnd) utterance.onend = onEnd; - window.speechSynthesis.speak(utterance); - }; - - if (window.speechSynthesis.getVoices().length > 0) { - trySpeak(); - } else { - window.speechSynthesis.onvoiceschanged = trySpeak; - } -} - -export function stopSpeaking(): void { - window.speechSynthesis?.cancel(); + notifyListeners(false); }