added echo cancelation
This commit is contained in:
@@ -1,88 +1,114 @@
|
||||
// src/hooks/useLiveVoice.ts
|
||||
"use client";
|
||||
import { useRef, useState, useCallback, useEffect } from "react";
|
||||
import { useEffect, useRef, useCallback, useState } from "react";
|
||||
import { onTTSSpeakingChange } from "@/lib/tts";
|
||||
|
||||
type LiveVoiceOptions = {
|
||||
interface UseLiveVoiceOptions {
|
||||
onUtterance: (text: string) => void;
|
||||
onSpeechStart?: () => void;
|
||||
};
|
||||
}
|
||||
|
||||
export function useLiveVoice({ onUtterance, onSpeechStart }: UseLiveVoiceOptions) {
|
||||
const recognitionRef = useRef<SpeechRecognition | null>(null);
|
||||
const stoppedManually = useRef(false);
|
||||
const ttsActiveRef = useRef(false); // ← tracks TTS state
|
||||
const pendingRestartRef = useRef(false); // ← restart queued?
|
||||
|
||||
export function useLiveVoice({ onUtterance, onSpeechStart }: LiveVoiceOptions) {
|
||||
const [isListening, setIsListening] = useState(false);
|
||||
const [isSpeaking, setIsSpeaking] = useState(false);
|
||||
const recognitionRef = useRef<SpeechRecognition | null>(null);
|
||||
const stoppedManually = useRef(false);
|
||||
const [isSpeaking, setIsSpeaking] = useState(false);
|
||||
|
||||
const start = useCallback(() => {
|
||||
const SpeechRecognition =
|
||||
window.SpeechRecognition || (window as any).webkitSpeechRecognition;
|
||||
// ── Internal start/stop helpers ────────────────────────────────────────────
|
||||
|
||||
if (!SpeechRecognition) {
|
||||
alert("Your browser doesn't support SpeechRecognition. Try Chrome.");
|
||||
const startRecognition = useCallback(() => {
|
||||
if (ttsActiveRef.current) {
|
||||
// TTS is playing — queue a restart for when it finishes
|
||||
pendingRestartRef.current = true;
|
||||
return;
|
||||
}
|
||||
const SpeechRecognition =
|
||||
window.SpeechRecognition || (window as any).webkitSpeechRecognition;
|
||||
if (!SpeechRecognition) return;
|
||||
|
||||
const recognition = new SpeechRecognition();
|
||||
recognition.continuous = true; // keep listening between utterances
|
||||
recognition.interimResults = false; // only fire when utterance is complete
|
||||
recognition.lang = "en-GB";
|
||||
const r = new SpeechRecognition();
|
||||
r.continuous = true;
|
||||
r.interimResults = false;
|
||||
r.lang = "en-US";
|
||||
|
||||
recognition.onstart = () => {
|
||||
setIsListening(true);
|
||||
r.onstart = () => setIsListening(true);
|
||||
r.onspeechstart = () => { setIsSpeaking(true); onSpeechStart?.(); };
|
||||
r.onspeechend = () => setIsSpeaking(false);
|
||||
|
||||
r.onresult = (e: SpeechRecognitionEvent) => {
|
||||
// Drop any result that came in while TTS was active
|
||||
if (ttsActiveRef.current) return;
|
||||
const transcript = Array.from(e.results)
|
||||
.filter((r) => r.isFinal)
|
||||
.map((r) => r[0].transcript)
|
||||
.join(" ")
|
||||
.trim();
|
||||
if (transcript) onUtterance(transcript);
|
||||
};
|
||||
|
||||
recognition.onspeechstart = () => {
|
||||
setIsSpeaking(true);
|
||||
onSpeechStart?.();
|
||||
};
|
||||
|
||||
recognition.onspeechend = () => {
|
||||
r.onend = () => {
|
||||
setIsListening(false);
|
||||
setIsSpeaking(false);
|
||||
};
|
||||
|
||||
recognition.onresult = (event: SpeechRecognitionEvent) => {
|
||||
const last = event.results[event.results.length - 1];
|
||||
if (last.isFinal) {
|
||||
const text = last[0].transcript.trim();
|
||||
if (text) onUtterance(text);
|
||||
// Auto-restart unless the user stopped manually or TTS is active
|
||||
if (!stoppedManually.current && !ttsActiveRef.current) {
|
||||
setTimeout(() => startRecognition(), 200);
|
||||
}
|
||||
};
|
||||
|
||||
recognition.onerror = (e: SpeechRecognitionErrorEvent) => {
|
||||
// 'no-speech' is normal background silence — just ignore it
|
||||
if (e.error === "no-speech") return;
|
||||
console.error("SpeechRecognition error:", e.error);
|
||||
};
|
||||
|
||||
recognition.onend = () => {
|
||||
// Auto-restart unless we stopped it manually
|
||||
if (!stoppedManually.current) {
|
||||
recognition.start();
|
||||
} else {
|
||||
setIsListening(false);
|
||||
setIsSpeaking(false);
|
||||
r.onerror = (e: SpeechRecognitionErrorEvent) => {
|
||||
if (e.error !== "no-speech" && e.error !== "aborted") {
|
||||
console.warn("SpeechRecognition error:", e.error);
|
||||
}
|
||||
};
|
||||
|
||||
stoppedManually.current = false;
|
||||
recognition.start();
|
||||
recognitionRef.current = recognition;
|
||||
recognitionRef.current = r;
|
||||
r.start();
|
||||
}, [onUtterance, onSpeechStart]);
|
||||
|
||||
const stop = useCallback(() => {
|
||||
stoppedManually.current = true;
|
||||
const stopRecognition = useCallback(() => {
|
||||
recognitionRef.current?.stop();
|
||||
recognitionRef.current = null;
|
||||
setIsListening(false);
|
||||
setIsSpeaking(false);
|
||||
}, []);
|
||||
|
||||
// ── TTS listener — pause mic while bot speaks ──────────────────────────────
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
stoppedManually.current = true;
|
||||
recognitionRef.current?.stop();
|
||||
};
|
||||
}, []);
|
||||
const unsub = onTTSSpeakingChange((speaking) => {
|
||||
ttsActiveRef.current = speaking;
|
||||
|
||||
if (speaking) {
|
||||
// Bot started talking — stop the mic immediately
|
||||
pendingRestartRef.current = false;
|
||||
stopRecognition();
|
||||
} else {
|
||||
// Bot finished talking — restart mic after a short silence gap
|
||||
// so the tail of the TTS audio doesn't get transcribed
|
||||
setTimeout(() => {
|
||||
if (!stoppedManually.current) {
|
||||
pendingRestartRef.current = false;
|
||||
startRecognition();
|
||||
}
|
||||
}, 600); // 600ms grace period after TTS ends
|
||||
}
|
||||
});
|
||||
return unsub;
|
||||
}, [startRecognition, stopRecognition]);
|
||||
|
||||
// ── Public API ─────────────────────────────────────────────────────────────
|
||||
|
||||
const start = useCallback(() => {
|
||||
stoppedManually.current = false;
|
||||
startRecognition();
|
||||
}, [startRecognition]);
|
||||
|
||||
const stop = useCallback(() => {
|
||||
stoppedManually.current = true;
|
||||
stopRecognition();
|
||||
}, [stopRecognition]);
|
||||
|
||||
return { isListening, isSpeaking, start, stop };
|
||||
}
|
||||
|
||||
@@ -1,46 +1,47 @@
|
||||
// src/hooks/useVoiceRecorder.ts
|
||||
"use client";
|
||||
import { useRef, useState, useCallback } from "react";
|
||||
import { useRef, useState } from "react";
|
||||
|
||||
export function useVoiceRecorder() {
|
||||
const [isRecording, setIsRecording] = useState(false);
|
||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||||
const chunksRef = useRef<Blob[]>([]);
|
||||
const chunksRef = useRef<Blob[]>([]);
|
||||
const [isRecording, setIsRecording] = useState(false);
|
||||
|
||||
const startRecording = useCallback(async () => {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
const recorder = new MediaRecorder(stream, { mimeType: "audio/webm" });
|
||||
const startRecording = async () => {
|
||||
// ↓ These constraints tell the browser's audio engine to suppress
|
||||
// echo from the speakers before the mic data ever reaches JS
|
||||
const stream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
echoCancellation: true, // ← removes speaker echo
|
||||
noiseSuppression: true, // ← removes background noise
|
||||
autoGainControl: true,
|
||||
channelCount: 1,
|
||||
sampleRate: 16000,
|
||||
},
|
||||
});
|
||||
chunksRef.current = [];
|
||||
recorder.ondataavailable = (e) => {
|
||||
if (e.data.size > 0) chunksRef.current.push(e.data);
|
||||
};
|
||||
const recorder = new MediaRecorder(stream);
|
||||
recorder.ondataavailable = (e) => { if (e.data.size > 0) chunksRef.current.push(e.data); };
|
||||
mediaRecorderRef.current = recorder;
|
||||
recorder.start(250);
|
||||
recorder.start();
|
||||
setIsRecording(true);
|
||||
}, []);
|
||||
};
|
||||
|
||||
// Returns a Float32Array that Transformers.js natively accepts
|
||||
const stopRecording = useCallback((): Promise<Float32Array> => {
|
||||
return new Promise((resolve, reject) => {
|
||||
const stopRecording = (): Promise<Float32Array> => {
|
||||
return new Promise((resolve) => {
|
||||
const recorder = mediaRecorderRef.current;
|
||||
if (!recorder) return reject("No recorder active");
|
||||
|
||||
if (!recorder) return resolve(new Float32Array(0));
|
||||
recorder.onstop = async () => {
|
||||
const blob = new Blob(chunksRef.current, { type: "audio/webm" });
|
||||
const arrBuf = await blob.arrayBuffer();
|
||||
const ctx = new AudioContext({ sampleRate: 16000 });
|
||||
const decoded = await ctx.decodeAudioData(arrBuf);
|
||||
resolve(decoded.getChannelData(0));
|
||||
recorder.stream.getTracks().forEach((t) => t.stop());
|
||||
setIsRecording(false);
|
||||
|
||||
const blob = new Blob(chunksRef.current, { type: "audio/webm" });
|
||||
const arrayBuffer = await blob.arrayBuffer();
|
||||
const audioCtx = new AudioContext({ sampleRate: 16000 });
|
||||
const decoded = await audioCtx.decodeAudioData(arrayBuffer);
|
||||
|
||||
// Whisper expects mono 16kHz Float32Array
|
||||
resolve(decoded.getChannelData(0));
|
||||
};
|
||||
|
||||
recorder.stop();
|
||||
});
|
||||
}, []);
|
||||
};
|
||||
|
||||
return { isRecording, startRecording, stopRecording };
|
||||
}
|
||||
|
||||
@@ -1,33 +1,31 @@
|
||||
// src/lib/tts.ts
|
||||
export function speak(text: string, onEnd?: () => void): void {
|
||||
if (typeof window === "undefined" || !window.speechSynthesis) return;
|
||||
|
||||
type TTSListener = (speaking: boolean) => void;
|
||||
const listeners = new Set<TTSListener>();
|
||||
|
||||
export function onTTSSpeakingChange(fn: TTSListener) {
|
||||
listeners.add(fn);
|
||||
return () => listeners.delete(fn);
|
||||
}
|
||||
|
||||
function notifyListeners(speaking: boolean) {
|
||||
listeners.forEach((fn) => fn(speaking));
|
||||
}
|
||||
|
||||
export function speak(text: string, rate = 1, pitch = 1) {
|
||||
if (typeof window === "undefined") return;
|
||||
stopSpeaking();
|
||||
const utter = new SpeechSynthesisUtterance(text);
|
||||
utter.rate = rate;
|
||||
utter.pitch = pitch;
|
||||
utter.onstart = () => notifyListeners(true);
|
||||
utter.onend = () => notifyListeners(false);
|
||||
utter.onerror = () => notifyListeners(false);
|
||||
window.speechSynthesis.speak(utter);
|
||||
}
|
||||
|
||||
export function stopSpeaking() {
|
||||
if (typeof window === "undefined") return;
|
||||
window.speechSynthesis.cancel();
|
||||
|
||||
const utterance = new SpeechSynthesisUtterance(text);
|
||||
utterance.rate = 1.05;
|
||||
utterance.pitch = 1.0;
|
||||
|
||||
// Wait for voices to load (Safari needs this)
|
||||
const trySpeak = () => {
|
||||
const voices = window.speechSynthesis.getVoices();
|
||||
const preferred = voices.find(
|
||||
(v) =>
|
||||
v.name.includes("Samantha") ||
|
||||
v.name.includes("Google UK English Female") ||
|
||||
v.name.includes("Google US English")
|
||||
);
|
||||
if (preferred) utterance.voice = preferred;
|
||||
if (onEnd) utterance.onend = onEnd;
|
||||
window.speechSynthesis.speak(utterance);
|
||||
};
|
||||
|
||||
if (window.speechSynthesis.getVoices().length > 0) {
|
||||
trySpeak();
|
||||
} else {
|
||||
window.speechSynthesis.onvoiceschanged = trySpeak;
|
||||
}
|
||||
}
|
||||
|
||||
export function stopSpeaking(): void {
|
||||
window.speechSynthesis?.cancel();
|
||||
notifyListeners(false);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user