added echo cancelation

This commit is contained in:
Will
2026-03-30 21:18:07 +01:00
parent dc14d00cc8
commit cac01c0590
3 changed files with 137 additions and 112 deletions

View File

@@ -1,88 +1,114 @@
// src/hooks/useLiveVoice.ts // src/hooks/useLiveVoice.ts
"use client"; import { useEffect, useRef, useCallback, useState } from "react";
import { useRef, useState, useCallback, useEffect } from "react"; import { onTTSSpeakingChange } from "@/lib/tts";
type LiveVoiceOptions = { interface UseLiveVoiceOptions {
onUtterance: (text: string) => void; onUtterance: (text: string) => void;
onSpeechStart?: () => void; onSpeechStart?: () => void;
}; }
export function useLiveVoice({ onUtterance, onSpeechStart }: LiveVoiceOptions) { export function useLiveVoice({ onUtterance, onSpeechStart }: UseLiveVoiceOptions) {
const [isListening, setIsListening] = useState(false);
const [isSpeaking, setIsSpeaking] = useState(false);
const recognitionRef = useRef<SpeechRecognition | null>(null); const recognitionRef = useRef<SpeechRecognition | null>(null);
const stoppedManually = useRef(false); const stoppedManually = useRef(false);
const ttsActiveRef = useRef(false); // ← tracks TTS state
const pendingRestartRef = useRef(false); // ← restart queued?
const start = useCallback(() => { const [isListening, setIsListening] = useState(false);
const SpeechRecognition = const [isSpeaking, setIsSpeaking] = useState(false);
window.SpeechRecognition || (window as any).webkitSpeechRecognition;
if (!SpeechRecognition) { // ── Internal start/stop helpers ────────────────────────────────────────────
alert("Your browser doesn't support SpeechRecognition. Try Chrome.");
const startRecognition = useCallback(() => {
if (ttsActiveRef.current) {
// TTS is playing — queue a restart for when it finishes
pendingRestartRef.current = true;
return; return;
} }
const SpeechRecognition =
window.SpeechRecognition || (window as any).webkitSpeechRecognition;
if (!SpeechRecognition) return;
const recognition = new SpeechRecognition(); const r = new SpeechRecognition();
recognition.continuous = true; // keep listening between utterances r.continuous = true;
recognition.interimResults = false; // only fire when utterance is complete r.interimResults = false;
recognition.lang = "en-GB"; r.lang = "en-US";
recognition.onstart = () => { r.onstart = () => setIsListening(true);
setIsListening(true); r.onspeechstart = () => { setIsSpeaking(true); onSpeechStart?.(); };
r.onspeechend = () => setIsSpeaking(false);
r.onresult = (e: SpeechRecognitionEvent) => {
// Drop any result that came in while TTS was active
if (ttsActiveRef.current) return;
const transcript = Array.from(e.results)
.filter((r) => r.isFinal)
.map((r) => r[0].transcript)
.join(" ")
.trim();
if (transcript) onUtterance(transcript);
}; };
recognition.onspeechstart = () => { r.onend = () => {
setIsSpeaking(true);
onSpeechStart?.();
};
recognition.onspeechend = () => {
setIsSpeaking(false);
};
recognition.onresult = (event: SpeechRecognitionEvent) => {
const last = event.results[event.results.length - 1];
if (last.isFinal) {
const text = last[0].transcript.trim();
if (text) onUtterance(text);
}
};
recognition.onerror = (e: SpeechRecognitionErrorEvent) => {
// 'no-speech' is normal background silence — just ignore it
if (e.error === "no-speech") return;
console.error("SpeechRecognition error:", e.error);
};
recognition.onend = () => {
// Auto-restart unless we stopped it manually
if (!stoppedManually.current) {
recognition.start();
} else {
setIsListening(false); setIsListening(false);
setIsSpeaking(false); setIsSpeaking(false);
// Auto-restart unless the user stopped manually or TTS is active
if (!stoppedManually.current && !ttsActiveRef.current) {
setTimeout(() => startRecognition(), 200);
} }
}; };
stoppedManually.current = false; r.onerror = (e: SpeechRecognitionErrorEvent) => {
recognition.start(); if (e.error !== "no-speech" && e.error !== "aborted") {
recognitionRef.current = recognition; console.warn("SpeechRecognition error:", e.error);
}
};
recognitionRef.current = r;
r.start();
}, [onUtterance, onSpeechStart]); }, [onUtterance, onSpeechStart]);
const stop = useCallback(() => { const stopRecognition = useCallback(() => {
stoppedManually.current = true;
recognitionRef.current?.stop(); recognitionRef.current?.stop();
recognitionRef.current = null; recognitionRef.current = null;
setIsListening(false); setIsListening(false);
setIsSpeaking(false); setIsSpeaking(false);
}, []); }, []);
// ── TTS listener — pause mic while bot speaks ──────────────────────────────
useEffect(() => { useEffect(() => {
return () => { const unsub = onTTSSpeakingChange((speaking) => {
ttsActiveRef.current = speaking;
if (speaking) {
// Bot started talking — stop the mic immediately
pendingRestartRef.current = false;
stopRecognition();
} else {
// Bot finished talking — restart mic after a short silence gap
// so the tail of the TTS audio doesn't get transcribed
setTimeout(() => {
if (!stoppedManually.current) {
pendingRestartRef.current = false;
startRecognition();
}
}, 600); // 600ms grace period after TTS ends
}
});
return unsub;
}, [startRecognition, stopRecognition]);
// ── Public API ─────────────────────────────────────────────────────────────
const start = useCallback(() => {
stoppedManually.current = false;
startRecognition();
}, [startRecognition]);
const stop = useCallback(() => {
stoppedManually.current = true; stoppedManually.current = true;
recognitionRef.current?.stop(); stopRecognition();
}; }, [stopRecognition]);
}, []);
return { isListening, isSpeaking, start, stop }; return { isListening, isSpeaking, start, stop };
} }

View File

@@ -1,46 +1,47 @@
// src/hooks/useVoiceRecorder.ts // src/hooks/useVoiceRecorder.ts
"use client"; import { useRef, useState } from "react";
import { useRef, useState, useCallback } from "react";
export function useVoiceRecorder() { export function useVoiceRecorder() {
const [isRecording, setIsRecording] = useState(false);
const mediaRecorderRef = useRef<MediaRecorder | null>(null); const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const chunksRef = useRef<Blob[]>([]); const chunksRef = useRef<Blob[]>([]);
const [isRecording, setIsRecording] = useState(false);
const startRecording = useCallback(async () => { const startRecording = async () => {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); // ↓ These constraints tell the browser's audio engine to suppress
const recorder = new MediaRecorder(stream, { mimeType: "audio/webm" }); // echo from the speakers before the mic data ever reaches JS
const stream = await navigator.mediaDevices.getUserMedia({
audio: {
echoCancellation: true, // ← removes speaker echo
noiseSuppression: true, // ← removes background noise
autoGainControl: true,
channelCount: 1,
sampleRate: 16000,
},
});
chunksRef.current = []; chunksRef.current = [];
recorder.ondataavailable = (e) => { const recorder = new MediaRecorder(stream);
if (e.data.size > 0) chunksRef.current.push(e.data); recorder.ondataavailable = (e) => { if (e.data.size > 0) chunksRef.current.push(e.data); };
};
mediaRecorderRef.current = recorder; mediaRecorderRef.current = recorder;
recorder.start(250); recorder.start();
setIsRecording(true); setIsRecording(true);
}, []); };
// Returns a Float32Array that Transformers.js natively accepts const stopRecording = (): Promise<Float32Array> => {
const stopRecording = useCallback((): Promise<Float32Array> => { return new Promise((resolve) => {
return new Promise((resolve, reject) => {
const recorder = mediaRecorderRef.current; const recorder = mediaRecorderRef.current;
if (!recorder) return reject("No recorder active"); if (!recorder) return resolve(new Float32Array(0));
recorder.onstop = async () => { recorder.onstop = async () => {
const blob = new Blob(chunksRef.current, { type: "audio/webm" });
const arrBuf = await blob.arrayBuffer();
const ctx = new AudioContext({ sampleRate: 16000 });
const decoded = await ctx.decodeAudioData(arrBuf);
resolve(decoded.getChannelData(0));
recorder.stream.getTracks().forEach((t) => t.stop()); recorder.stream.getTracks().forEach((t) => t.stop());
setIsRecording(false); setIsRecording(false);
const blob = new Blob(chunksRef.current, { type: "audio/webm" });
const arrayBuffer = await blob.arrayBuffer();
const audioCtx = new AudioContext({ sampleRate: 16000 });
const decoded = await audioCtx.decodeAudioData(arrayBuffer);
// Whisper expects mono 16kHz Float32Array
resolve(decoded.getChannelData(0));
}; };
recorder.stop(); recorder.stop();
}); });
}, []); };
return { isRecording, startRecording, stopRecording }; return { isRecording, startRecording, stopRecording };
} }

View File

@@ -1,33 +1,31 @@
// src/lib/tts.ts // src/lib/tts.ts
export function speak(text: string, onEnd?: () => void): void {
if (typeof window === "undefined" || !window.speechSynthesis) return; type TTSListener = (speaking: boolean) => void;
const listeners = new Set<TTSListener>();
export function onTTSSpeakingChange(fn: TTSListener) {
listeners.add(fn);
return () => listeners.delete(fn);
}
function notifyListeners(speaking: boolean) {
listeners.forEach((fn) => fn(speaking));
}
export function speak(text: string, rate = 1, pitch = 1) {
if (typeof window === "undefined") return;
stopSpeaking();
const utter = new SpeechSynthesisUtterance(text);
utter.rate = rate;
utter.pitch = pitch;
utter.onstart = () => notifyListeners(true);
utter.onend = () => notifyListeners(false);
utter.onerror = () => notifyListeners(false);
window.speechSynthesis.speak(utter);
}
export function stopSpeaking() {
if (typeof window === "undefined") return;
window.speechSynthesis.cancel(); window.speechSynthesis.cancel();
notifyListeners(false);
const utterance = new SpeechSynthesisUtterance(text);
utterance.rate = 1.05;
utterance.pitch = 1.0;
// Wait for voices to load (Safari needs this)
const trySpeak = () => {
const voices = window.speechSynthesis.getVoices();
const preferred = voices.find(
(v) =>
v.name.includes("Samantha") ||
v.name.includes("Google UK English Female") ||
v.name.includes("Google US English")
);
if (preferred) utterance.voice = preferred;
if (onEnd) utterance.onend = onEnd;
window.speechSynthesis.speak(utterance);
};
if (window.speechSynthesis.getVoices().length > 0) {
trySpeak();
} else {
window.speechSynthesis.onvoiceschanged = trySpeak;
}
}
export function stopSpeaking(): void {
window.speechSynthesis?.cancel();
} }