added echo cancelation

This commit is contained in:
Will
2026-03-30 21:18:07 +01:00
parent dc14d00cc8
commit cac01c0590
3 changed files with 137 additions and 112 deletions

View File

@@ -1,88 +1,114 @@
// src/hooks/useLiveVoice.ts
"use client";
import { useRef, useState, useCallback, useEffect } from "react";
import { useEffect, useRef, useCallback, useState } from "react";
import { onTTSSpeakingChange } from "@/lib/tts";
type LiveVoiceOptions = {
interface UseLiveVoiceOptions {
onUtterance: (text: string) => void;
onSpeechStart?: () => void;
};
}
export function useLiveVoice({ onUtterance, onSpeechStart }: UseLiveVoiceOptions) {
const recognitionRef = useRef<SpeechRecognition | null>(null);
const stoppedManually = useRef(false);
const ttsActiveRef = useRef(false); // ← tracks TTS state
const pendingRestartRef = useRef(false); // ← restart queued?
export function useLiveVoice({ onUtterance, onSpeechStart }: LiveVoiceOptions) {
const [isListening, setIsListening] = useState(false);
const [isSpeaking, setIsSpeaking] = useState(false);
const recognitionRef = useRef<SpeechRecognition | null>(null);
const stoppedManually = useRef(false);
const [isSpeaking, setIsSpeaking] = useState(false);
const start = useCallback(() => {
const SpeechRecognition =
window.SpeechRecognition || (window as any).webkitSpeechRecognition;
// ── Internal start/stop helpers ────────────────────────────────────────────
if (!SpeechRecognition) {
alert("Your browser doesn't support SpeechRecognition. Try Chrome.");
const startRecognition = useCallback(() => {
if (ttsActiveRef.current) {
// TTS is playing — queue a restart for when it finishes
pendingRestartRef.current = true;
return;
}
const SpeechRecognition =
window.SpeechRecognition || (window as any).webkitSpeechRecognition;
if (!SpeechRecognition) return;
const recognition = new SpeechRecognition();
recognition.continuous = true; // keep listening between utterances
recognition.interimResults = false; // only fire when utterance is complete
recognition.lang = "en-GB";
const r = new SpeechRecognition();
r.continuous = true;
r.interimResults = false;
r.lang = "en-US";
recognition.onstart = () => {
setIsListening(true);
r.onstart = () => setIsListening(true);
r.onspeechstart = () => { setIsSpeaking(true); onSpeechStart?.(); };
r.onspeechend = () => setIsSpeaking(false);
r.onresult = (e: SpeechRecognitionEvent) => {
// Drop any result that came in while TTS was active
if (ttsActiveRef.current) return;
const transcript = Array.from(e.results)
.filter((r) => r.isFinal)
.map((r) => r[0].transcript)
.join(" ")
.trim();
if (transcript) onUtterance(transcript);
};
recognition.onspeechstart = () => {
setIsSpeaking(true);
onSpeechStart?.();
};
recognition.onspeechend = () => {
r.onend = () => {
setIsListening(false);
setIsSpeaking(false);
};
recognition.onresult = (event: SpeechRecognitionEvent) => {
const last = event.results[event.results.length - 1];
if (last.isFinal) {
const text = last[0].transcript.trim();
if (text) onUtterance(text);
// Auto-restart unless the user stopped manually or TTS is active
if (!stoppedManually.current && !ttsActiveRef.current) {
setTimeout(() => startRecognition(), 200);
}
};
recognition.onerror = (e: SpeechRecognitionErrorEvent) => {
// 'no-speech' is normal background silence — just ignore it
if (e.error === "no-speech") return;
console.error("SpeechRecognition error:", e.error);
};
recognition.onend = () => {
// Auto-restart unless we stopped it manually
if (!stoppedManually.current) {
recognition.start();
} else {
setIsListening(false);
setIsSpeaking(false);
r.onerror = (e: SpeechRecognitionErrorEvent) => {
if (e.error !== "no-speech" && e.error !== "aborted") {
console.warn("SpeechRecognition error:", e.error);
}
};
stoppedManually.current = false;
recognition.start();
recognitionRef.current = recognition;
recognitionRef.current = r;
r.start();
}, [onUtterance, onSpeechStart]);
const stop = useCallback(() => {
stoppedManually.current = true;
const stopRecognition = useCallback(() => {
recognitionRef.current?.stop();
recognitionRef.current = null;
setIsListening(false);
setIsSpeaking(false);
}, []);
// ── TTS listener — pause mic while bot speaks ──────────────────────────────
useEffect(() => {
return () => {
stoppedManually.current = true;
recognitionRef.current?.stop();
};
}, []);
const unsub = onTTSSpeakingChange((speaking) => {
ttsActiveRef.current = speaking;
if (speaking) {
// Bot started talking — stop the mic immediately
pendingRestartRef.current = false;
stopRecognition();
} else {
// Bot finished talking — restart mic after a short silence gap
// so the tail of the TTS audio doesn't get transcribed
setTimeout(() => {
if (!stoppedManually.current) {
pendingRestartRef.current = false;
startRecognition();
}
}, 600); // 600ms grace period after TTS ends
}
});
return unsub;
}, [startRecognition, stopRecognition]);
// ── Public API ─────────────────────────────────────────────────────────────
const start = useCallback(() => {
stoppedManually.current = false;
startRecognition();
}, [startRecognition]);
const stop = useCallback(() => {
stoppedManually.current = true;
stopRecognition();
}, [stopRecognition]);
return { isListening, isSpeaking, start, stop };
}

View File

@@ -1,46 +1,47 @@
// src/hooks/useVoiceRecorder.ts
"use client";
import { useRef, useState, useCallback } from "react";
import { useRef, useState } from "react";
export function useVoiceRecorder() {
const [isRecording, setIsRecording] = useState(false);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const chunksRef = useRef<Blob[]>([]);
const chunksRef = useRef<Blob[]>([]);
const [isRecording, setIsRecording] = useState(false);
const startRecording = useCallback(async () => {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
const recorder = new MediaRecorder(stream, { mimeType: "audio/webm" });
const startRecording = async () => {
// ↓ These constraints tell the browser's audio engine to suppress
// echo from the speakers before the mic data ever reaches JS
const stream = await navigator.mediaDevices.getUserMedia({
audio: {
echoCancellation: true, // ← removes speaker echo
noiseSuppression: true, // ← removes background noise
autoGainControl: true,
channelCount: 1,
sampleRate: 16000,
},
});
chunksRef.current = [];
recorder.ondataavailable = (e) => {
if (e.data.size > 0) chunksRef.current.push(e.data);
};
const recorder = new MediaRecorder(stream);
recorder.ondataavailable = (e) => { if (e.data.size > 0) chunksRef.current.push(e.data); };
mediaRecorderRef.current = recorder;
recorder.start(250);
recorder.start();
setIsRecording(true);
}, []);
};
// Returns a Float32Array that Transformers.js natively accepts
const stopRecording = useCallback((): Promise<Float32Array> => {
return new Promise((resolve, reject) => {
const stopRecording = (): Promise<Float32Array> => {
return new Promise((resolve) => {
const recorder = mediaRecorderRef.current;
if (!recorder) return reject("No recorder active");
if (!recorder) return resolve(new Float32Array(0));
recorder.onstop = async () => {
const blob = new Blob(chunksRef.current, { type: "audio/webm" });
const arrBuf = await blob.arrayBuffer();
const ctx = new AudioContext({ sampleRate: 16000 });
const decoded = await ctx.decodeAudioData(arrBuf);
resolve(decoded.getChannelData(0));
recorder.stream.getTracks().forEach((t) => t.stop());
setIsRecording(false);
const blob = new Blob(chunksRef.current, { type: "audio/webm" });
const arrayBuffer = await blob.arrayBuffer();
const audioCtx = new AudioContext({ sampleRate: 16000 });
const decoded = await audioCtx.decodeAudioData(arrayBuffer);
// Whisper expects mono 16kHz Float32Array
resolve(decoded.getChannelData(0));
};
recorder.stop();
});
}, []);
};
return { isRecording, startRecording, stopRecording };
}

View File

@@ -1,33 +1,31 @@
// src/lib/tts.ts
export function speak(text: string, onEnd?: () => void): void {
if (typeof window === "undefined" || !window.speechSynthesis) return;
type TTSListener = (speaking: boolean) => void;
const listeners = new Set<TTSListener>();
export function onTTSSpeakingChange(fn: TTSListener) {
listeners.add(fn);
return () => listeners.delete(fn);
}
function notifyListeners(speaking: boolean) {
listeners.forEach((fn) => fn(speaking));
}
export function speak(text: string, rate = 1, pitch = 1) {
if (typeof window === "undefined") return;
stopSpeaking();
const utter = new SpeechSynthesisUtterance(text);
utter.rate = rate;
utter.pitch = pitch;
utter.onstart = () => notifyListeners(true);
utter.onend = () => notifyListeners(false);
utter.onerror = () => notifyListeners(false);
window.speechSynthesis.speak(utter);
}
export function stopSpeaking() {
if (typeof window === "undefined") return;
window.speechSynthesis.cancel();
const utterance = new SpeechSynthesisUtterance(text);
utterance.rate = 1.05;
utterance.pitch = 1.0;
// Wait for voices to load (Safari needs this)
const trySpeak = () => {
const voices = window.speechSynthesis.getVoices();
const preferred = voices.find(
(v) =>
v.name.includes("Samantha") ||
v.name.includes("Google UK English Female") ||
v.name.includes("Google US English")
);
if (preferred) utterance.voice = preferred;
if (onEnd) utterance.onend = onEnd;
window.speechSynthesis.speak(utterance);
};
if (window.speechSynthesis.getVoices().length > 0) {
trySpeak();
} else {
window.speechSynthesis.onvoiceschanged = trySpeak;
}
}
export function stopSpeaking(): void {
window.speechSynthesis?.cancel();
notifyListeners(false);
}