added echo cancelation
This commit is contained in:
@@ -1,88 +1,114 @@
|
|||||||
// src/hooks/useLiveVoice.ts
|
// src/hooks/useLiveVoice.ts
|
||||||
"use client";
|
import { useEffect, useRef, useCallback, useState } from "react";
|
||||||
import { useRef, useState, useCallback, useEffect } from "react";
|
import { onTTSSpeakingChange } from "@/lib/tts";
|
||||||
|
|
||||||
type LiveVoiceOptions = {
|
interface UseLiveVoiceOptions {
|
||||||
onUtterance: (text: string) => void;
|
onUtterance: (text: string) => void;
|
||||||
onSpeechStart?: () => void;
|
onSpeechStart?: () => void;
|
||||||
};
|
}
|
||||||
|
|
||||||
export function useLiveVoice({ onUtterance, onSpeechStart }: LiveVoiceOptions) {
|
export function useLiveVoice({ onUtterance, onSpeechStart }: UseLiveVoiceOptions) {
|
||||||
const [isListening, setIsListening] = useState(false);
|
|
||||||
const [isSpeaking, setIsSpeaking] = useState(false);
|
|
||||||
const recognitionRef = useRef<SpeechRecognition | null>(null);
|
const recognitionRef = useRef<SpeechRecognition | null>(null);
|
||||||
const stoppedManually = useRef(false);
|
const stoppedManually = useRef(false);
|
||||||
|
const ttsActiveRef = useRef(false); // ← tracks TTS state
|
||||||
|
const pendingRestartRef = useRef(false); // ← restart queued?
|
||||||
|
|
||||||
const start = useCallback(() => {
|
const [isListening, setIsListening] = useState(false);
|
||||||
const SpeechRecognition =
|
const [isSpeaking, setIsSpeaking] = useState(false);
|
||||||
window.SpeechRecognition || (window as any).webkitSpeechRecognition;
|
|
||||||
|
|
||||||
if (!SpeechRecognition) {
|
// ── Internal start/stop helpers ────────────────────────────────────────────
|
||||||
alert("Your browser doesn't support SpeechRecognition. Try Chrome.");
|
|
||||||
|
const startRecognition = useCallback(() => {
|
||||||
|
if (ttsActiveRef.current) {
|
||||||
|
// TTS is playing — queue a restart for when it finishes
|
||||||
|
pendingRestartRef.current = true;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
const SpeechRecognition =
|
||||||
|
window.SpeechRecognition || (window as any).webkitSpeechRecognition;
|
||||||
|
if (!SpeechRecognition) return;
|
||||||
|
|
||||||
const recognition = new SpeechRecognition();
|
const r = new SpeechRecognition();
|
||||||
recognition.continuous = true; // keep listening between utterances
|
r.continuous = true;
|
||||||
recognition.interimResults = false; // only fire when utterance is complete
|
r.interimResults = false;
|
||||||
recognition.lang = "en-GB";
|
r.lang = "en-US";
|
||||||
|
|
||||||
recognition.onstart = () => {
|
r.onstart = () => setIsListening(true);
|
||||||
setIsListening(true);
|
r.onspeechstart = () => { setIsSpeaking(true); onSpeechStart?.(); };
|
||||||
|
r.onspeechend = () => setIsSpeaking(false);
|
||||||
|
|
||||||
|
r.onresult = (e: SpeechRecognitionEvent) => {
|
||||||
|
// Drop any result that came in while TTS was active
|
||||||
|
if (ttsActiveRef.current) return;
|
||||||
|
const transcript = Array.from(e.results)
|
||||||
|
.filter((r) => r.isFinal)
|
||||||
|
.map((r) => r[0].transcript)
|
||||||
|
.join(" ")
|
||||||
|
.trim();
|
||||||
|
if (transcript) onUtterance(transcript);
|
||||||
};
|
};
|
||||||
|
|
||||||
recognition.onspeechstart = () => {
|
r.onend = () => {
|
||||||
setIsSpeaking(true);
|
|
||||||
onSpeechStart?.();
|
|
||||||
};
|
|
||||||
|
|
||||||
recognition.onspeechend = () => {
|
|
||||||
setIsSpeaking(false);
|
|
||||||
};
|
|
||||||
|
|
||||||
recognition.onresult = (event: SpeechRecognitionEvent) => {
|
|
||||||
const last = event.results[event.results.length - 1];
|
|
||||||
if (last.isFinal) {
|
|
||||||
const text = last[0].transcript.trim();
|
|
||||||
if (text) onUtterance(text);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
recognition.onerror = (e: SpeechRecognitionErrorEvent) => {
|
|
||||||
// 'no-speech' is normal background silence — just ignore it
|
|
||||||
if (e.error === "no-speech") return;
|
|
||||||
console.error("SpeechRecognition error:", e.error);
|
|
||||||
};
|
|
||||||
|
|
||||||
recognition.onend = () => {
|
|
||||||
// Auto-restart unless we stopped it manually
|
|
||||||
if (!stoppedManually.current) {
|
|
||||||
recognition.start();
|
|
||||||
} else {
|
|
||||||
setIsListening(false);
|
setIsListening(false);
|
||||||
setIsSpeaking(false);
|
setIsSpeaking(false);
|
||||||
|
// Auto-restart unless the user stopped manually or TTS is active
|
||||||
|
if (!stoppedManually.current && !ttsActiveRef.current) {
|
||||||
|
setTimeout(() => startRecognition(), 200);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
stoppedManually.current = false;
|
r.onerror = (e: SpeechRecognitionErrorEvent) => {
|
||||||
recognition.start();
|
if (e.error !== "no-speech" && e.error !== "aborted") {
|
||||||
recognitionRef.current = recognition;
|
console.warn("SpeechRecognition error:", e.error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
recognitionRef.current = r;
|
||||||
|
r.start();
|
||||||
}, [onUtterance, onSpeechStart]);
|
}, [onUtterance, onSpeechStart]);
|
||||||
|
|
||||||
const stop = useCallback(() => {
|
const stopRecognition = useCallback(() => {
|
||||||
stoppedManually.current = true;
|
|
||||||
recognitionRef.current?.stop();
|
recognitionRef.current?.stop();
|
||||||
recognitionRef.current = null;
|
recognitionRef.current = null;
|
||||||
setIsListening(false);
|
setIsListening(false);
|
||||||
setIsSpeaking(false);
|
setIsSpeaking(false);
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
|
// ── TTS listener — pause mic while bot speaks ──────────────────────────────
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
return () => {
|
const unsub = onTTSSpeakingChange((speaking) => {
|
||||||
|
ttsActiveRef.current = speaking;
|
||||||
|
|
||||||
|
if (speaking) {
|
||||||
|
// Bot started talking — stop the mic immediately
|
||||||
|
pendingRestartRef.current = false;
|
||||||
|
stopRecognition();
|
||||||
|
} else {
|
||||||
|
// Bot finished talking — restart mic after a short silence gap
|
||||||
|
// so the tail of the TTS audio doesn't get transcribed
|
||||||
|
setTimeout(() => {
|
||||||
|
if (!stoppedManually.current) {
|
||||||
|
pendingRestartRef.current = false;
|
||||||
|
startRecognition();
|
||||||
|
}
|
||||||
|
}, 600); // 600ms grace period after TTS ends
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return unsub;
|
||||||
|
}, [startRecognition, stopRecognition]);
|
||||||
|
|
||||||
|
// ── Public API ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
const start = useCallback(() => {
|
||||||
|
stoppedManually.current = false;
|
||||||
|
startRecognition();
|
||||||
|
}, [startRecognition]);
|
||||||
|
|
||||||
|
const stop = useCallback(() => {
|
||||||
stoppedManually.current = true;
|
stoppedManually.current = true;
|
||||||
recognitionRef.current?.stop();
|
stopRecognition();
|
||||||
};
|
}, [stopRecognition]);
|
||||||
}, []);
|
|
||||||
|
|
||||||
return { isListening, isSpeaking, start, stop };
|
return { isListening, isSpeaking, start, stop };
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,46 +1,47 @@
|
|||||||
// src/hooks/useVoiceRecorder.ts
|
// src/hooks/useVoiceRecorder.ts
|
||||||
"use client";
|
import { useRef, useState } from "react";
|
||||||
import { useRef, useState, useCallback } from "react";
|
|
||||||
|
|
||||||
export function useVoiceRecorder() {
|
export function useVoiceRecorder() {
|
||||||
const [isRecording, setIsRecording] = useState(false);
|
|
||||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||||||
const chunksRef = useRef<Blob[]>([]);
|
const chunksRef = useRef<Blob[]>([]);
|
||||||
|
const [isRecording, setIsRecording] = useState(false);
|
||||||
|
|
||||||
const startRecording = useCallback(async () => {
|
const startRecording = async () => {
|
||||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
// ↓ These constraints tell the browser's audio engine to suppress
|
||||||
const recorder = new MediaRecorder(stream, { mimeType: "audio/webm" });
|
// echo from the speakers before the mic data ever reaches JS
|
||||||
|
const stream = await navigator.mediaDevices.getUserMedia({
|
||||||
|
audio: {
|
||||||
|
echoCancellation: true, // ← removes speaker echo
|
||||||
|
noiseSuppression: true, // ← removes background noise
|
||||||
|
autoGainControl: true,
|
||||||
|
channelCount: 1,
|
||||||
|
sampleRate: 16000,
|
||||||
|
},
|
||||||
|
});
|
||||||
chunksRef.current = [];
|
chunksRef.current = [];
|
||||||
recorder.ondataavailable = (e) => {
|
const recorder = new MediaRecorder(stream);
|
||||||
if (e.data.size > 0) chunksRef.current.push(e.data);
|
recorder.ondataavailable = (e) => { if (e.data.size > 0) chunksRef.current.push(e.data); };
|
||||||
};
|
|
||||||
mediaRecorderRef.current = recorder;
|
mediaRecorderRef.current = recorder;
|
||||||
recorder.start(250);
|
recorder.start();
|
||||||
setIsRecording(true);
|
setIsRecording(true);
|
||||||
}, []);
|
};
|
||||||
|
|
||||||
// Returns a Float32Array that Transformers.js natively accepts
|
const stopRecording = (): Promise<Float32Array> => {
|
||||||
const stopRecording = useCallback((): Promise<Float32Array> => {
|
return new Promise((resolve) => {
|
||||||
return new Promise((resolve, reject) => {
|
|
||||||
const recorder = mediaRecorderRef.current;
|
const recorder = mediaRecorderRef.current;
|
||||||
if (!recorder) return reject("No recorder active");
|
if (!recorder) return resolve(new Float32Array(0));
|
||||||
|
|
||||||
recorder.onstop = async () => {
|
recorder.onstop = async () => {
|
||||||
|
const blob = new Blob(chunksRef.current, { type: "audio/webm" });
|
||||||
|
const arrBuf = await blob.arrayBuffer();
|
||||||
|
const ctx = new AudioContext({ sampleRate: 16000 });
|
||||||
|
const decoded = await ctx.decodeAudioData(arrBuf);
|
||||||
|
resolve(decoded.getChannelData(0));
|
||||||
recorder.stream.getTracks().forEach((t) => t.stop());
|
recorder.stream.getTracks().forEach((t) => t.stop());
|
||||||
setIsRecording(false);
|
setIsRecording(false);
|
||||||
|
|
||||||
const blob = new Blob(chunksRef.current, { type: "audio/webm" });
|
|
||||||
const arrayBuffer = await blob.arrayBuffer();
|
|
||||||
const audioCtx = new AudioContext({ sampleRate: 16000 });
|
|
||||||
const decoded = await audioCtx.decodeAudioData(arrayBuffer);
|
|
||||||
|
|
||||||
// Whisper expects mono 16kHz Float32Array
|
|
||||||
resolve(decoded.getChannelData(0));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
recorder.stop();
|
recorder.stop();
|
||||||
});
|
});
|
||||||
}, []);
|
};
|
||||||
|
|
||||||
return { isRecording, startRecording, stopRecording };
|
return { isRecording, startRecording, stopRecording };
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,33 +1,31 @@
|
|||||||
// src/lib/tts.ts
|
// src/lib/tts.ts
|
||||||
export function speak(text: string, onEnd?: () => void): void {
|
|
||||||
if (typeof window === "undefined" || !window.speechSynthesis) return;
|
type TTSListener = (speaking: boolean) => void;
|
||||||
|
const listeners = new Set<TTSListener>();
|
||||||
|
|
||||||
|
export function onTTSSpeakingChange(fn: TTSListener) {
|
||||||
|
listeners.add(fn);
|
||||||
|
return () => listeners.delete(fn);
|
||||||
|
}
|
||||||
|
|
||||||
|
function notifyListeners(speaking: boolean) {
|
||||||
|
listeners.forEach((fn) => fn(speaking));
|
||||||
|
}
|
||||||
|
|
||||||
|
export function speak(text: string, rate = 1, pitch = 1) {
|
||||||
|
if (typeof window === "undefined") return;
|
||||||
|
stopSpeaking();
|
||||||
|
const utter = new SpeechSynthesisUtterance(text);
|
||||||
|
utter.rate = rate;
|
||||||
|
utter.pitch = pitch;
|
||||||
|
utter.onstart = () => notifyListeners(true);
|
||||||
|
utter.onend = () => notifyListeners(false);
|
||||||
|
utter.onerror = () => notifyListeners(false);
|
||||||
|
window.speechSynthesis.speak(utter);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function stopSpeaking() {
|
||||||
|
if (typeof window === "undefined") return;
|
||||||
window.speechSynthesis.cancel();
|
window.speechSynthesis.cancel();
|
||||||
|
notifyListeners(false);
|
||||||
const utterance = new SpeechSynthesisUtterance(text);
|
|
||||||
utterance.rate = 1.05;
|
|
||||||
utterance.pitch = 1.0;
|
|
||||||
|
|
||||||
// Wait for voices to load (Safari needs this)
|
|
||||||
const trySpeak = () => {
|
|
||||||
const voices = window.speechSynthesis.getVoices();
|
|
||||||
const preferred = voices.find(
|
|
||||||
(v) =>
|
|
||||||
v.name.includes("Samantha") ||
|
|
||||||
v.name.includes("Google UK English Female") ||
|
|
||||||
v.name.includes("Google US English")
|
|
||||||
);
|
|
||||||
if (preferred) utterance.voice = preferred;
|
|
||||||
if (onEnd) utterance.onend = onEnd;
|
|
||||||
window.speechSynthesis.speak(utterance);
|
|
||||||
};
|
|
||||||
|
|
||||||
if (window.speechSynthesis.getVoices().length > 0) {
|
|
||||||
trySpeak();
|
|
||||||
} else {
|
|
||||||
window.speechSynthesis.onvoiceschanged = trySpeak;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export function stopSpeaking(): void {
|
|
||||||
window.speechSynthesis?.cancel();
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user