added live mode for speach with utterence detection
This commit is contained in:
154
src/app/page.tsx
154
src/app/page.tsx
@@ -1,18 +1,51 @@
|
||||
// src/app/page.tsx
|
||||
"use client";
|
||||
import { useState, useRef, useEffect } from "react";
|
||||
import { useState, useRef, useEffect, useCallback } from "react";
|
||||
import { useChat } from "@/hooks/useChat";
|
||||
import { useWhisper } from "@/hooks/useWhisper";
|
||||
import { useVoiceRecorder } from "@/hooks/useVoiceRecorder";
|
||||
import { useLiveVoice } from "@/hooks/useLiveVoice";
|
||||
import { stopSpeaking } from "@/lib/tts";
|
||||
|
||||
export default function Home() {
|
||||
const [textInput, setTextInput] = useState("");
|
||||
const [liveMode, setLiveMode] = useState(false);
|
||||
const [isSpeaking, setIsSpeaking] = useState(false);
|
||||
const { messages, isLoading, sendMessage } = useChat();
|
||||
const { status: whisperStatus, modelMessage, transcribe } = useWhisper();
|
||||
const { status: whisperStatus, transcribe } = useWhisper();
|
||||
const { isRecording, startRecording, stopRecording } = useVoiceRecorder();
|
||||
const bottomRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
const handleUtterance = useCallback(
|
||||
(text: string) => {
|
||||
stopSpeaking();
|
||||
sendMessage(text, "voice");
|
||||
},
|
||||
[sendMessage]
|
||||
);
|
||||
|
||||
const { isListening, isSpeaking: vadSpeaking, start: startLive, stop: stopLive } =
|
||||
useLiveVoice({
|
||||
onUtterance: handleUtterance,
|
||||
onSpeechStart: () => setIsSpeaking(true),
|
||||
});
|
||||
|
||||
// Sync VAD speaking state
|
||||
useEffect(() => {
|
||||
setIsSpeaking(vadSpeaking);
|
||||
}, [vadSpeaking]);
|
||||
|
||||
const handleLiveToggle = () => {
|
||||
if (!liveMode) {
|
||||
setLiveMode(true);
|
||||
startLive();
|
||||
} else {
|
||||
setLiveMode(false);
|
||||
stopLive();
|
||||
setIsSpeaking(false);
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
bottomRef.current?.scrollIntoView({ behavior: "smooth" });
|
||||
}, [messages]);
|
||||
@@ -37,22 +70,15 @@ export default function Home() {
|
||||
if (text) sendMessage(text, "voice");
|
||||
};
|
||||
|
||||
const pttDisabled =
|
||||
whisperStatus !== "ready" || isLoading;
|
||||
|
||||
const pttLabel = () => {
|
||||
if (whisperStatus === "loading") return "⏳";
|
||||
if (whisperStatus === "transcribing") return "💬";
|
||||
if (isRecording) return "🔴";
|
||||
return "🎙";
|
||||
};
|
||||
const pttDisabled = whisperStatus !== "ready" || isLoading || liveMode;
|
||||
|
||||
const statusLine = () => {
|
||||
if (whisperStatus === "loading") return modelMessage;
|
||||
if (whisperStatus === "transcribing") return "Transcribing on-device…";
|
||||
if (isRecording) return "Recording… release to send";
|
||||
if (whisperStatus === "ready") return "Hold to talk — Whisper ready ✓";
|
||||
return "Initialising Whisper…";
|
||||
if (liveMode && isSpeaking) return "🎙 Hearing you…";
|
||||
if (liveMode && isLoading) return "⏳ Claw is thinking…";
|
||||
if (liveMode) return "👂 Listening — just speak naturally";
|
||||
if (whisperStatus === "transcribing") return "💬 Transcribing…";
|
||||
if (isRecording) return "🔴 Recording… release to send";
|
||||
return "Hold to talk";
|
||||
};
|
||||
|
||||
return (
|
||||
@@ -64,23 +90,48 @@ export default function Home() {
|
||||
<h1 className="text-xl font-bold tracking-tight">OpenClaw Voice</h1>
|
||||
<p className="text-xs text-gray-500">On-device Whisper · No API keys</p>
|
||||
</div>
|
||||
<span
|
||||
className={`ml-auto w-2 h-2 rounded-full ${
|
||||
whisperStatus === "ready" ? "bg-green-400" : "bg-yellow-400 animate-pulse"
|
||||
}`}
|
||||
/>
|
||||
|
||||
{/* Live Mode Toggle */}
|
||||
<div className="ml-auto flex items-center gap-2">
|
||||
<span className={`text-xs font-medium ${liveMode ? "text-green-400" : "text-gray-500"}`}>
|
||||
Live
|
||||
</span>
|
||||
<button
|
||||
onClick={handleLiveToggle}
|
||||
className={`relative inline-flex h-6 w-11 items-center rounded-full transition-colors focus:outline-none
|
||||
${liveMode ? "bg-green-500" : "bg-gray-700"}`}
|
||||
>
|
||||
<span
|
||||
className={`inline-block h-4 w-4 transform rounded-full bg-white shadow transition-transform
|
||||
${liveMode ? "translate-x-6" : "translate-x-1"}`}
|
||||
/>
|
||||
</button>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
{/* Live mode indicator bar */}
|
||||
{liveMode && (
|
||||
<div
|
||||
className={`flex items-center justify-center gap-2 py-1.5 text-xs font-medium transition-all
|
||||
${isSpeaking ? "bg-green-600 text-white" : "bg-green-900/40 text-green-400"}`}
|
||||
>
|
||||
<span
|
||||
className={`w-1.5 h-1.5 rounded-full ${
|
||||
isSpeaking ? "bg-white animate-ping" : "bg-green-400"
|
||||
}`}
|
||||
/>
|
||||
{isSpeaking ? "Speech detected" : "Waiting for speech…"}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Messages */}
|
||||
<div className="flex-1 overflow-y-auto px-4 py-6 space-y-4">
|
||||
{messages.length === 0 && (
|
||||
<div className="text-center mt-20 space-y-2">
|
||||
<p className="text-gray-500 text-sm">
|
||||
{whisperStatus === "ready"
|
||||
? "Whisper loaded. Hold the button to talk or type below."
|
||||
: modelMessage || "Loading Whisper model…"}
|
||||
</p>
|
||||
</div>
|
||||
<p className="text-center text-gray-600 mt-20 text-sm">
|
||||
{liveMode
|
||||
? "Live mode on — just start talking"
|
||||
: "Hold the button to talk, or type below."}
|
||||
</p>
|
||||
)}
|
||||
{messages.map((msg) => (
|
||||
<div
|
||||
@@ -96,7 +147,7 @@ export default function Home() {
|
||||
>
|
||||
{msg.source === "voice" && (
|
||||
<span className="text-xs opacity-40 block mb-1">
|
||||
{msg.role === "user" ? "🎙 transcribed" : "🔊 spoken"}
|
||||
{msg.role === "user" ? "🎙 live" : "🔊 spoken"}
|
||||
</span>
|
||||
)}
|
||||
{msg.content || <span className="opacity-40 animate-pulse">▍</span>}
|
||||
@@ -108,27 +159,30 @@ export default function Home() {
|
||||
|
||||
{/* Controls */}
|
||||
<div className="border-t border-gray-800 bg-gray-900 px-4 py-4 space-y-3">
|
||||
{/* PTT Button */}
|
||||
<div className="flex justify-center">
|
||||
<button
|
||||
onMouseDown={handlePTTDown}
|
||||
onMouseUp={handlePTTUp}
|
||||
onTouchStart={(e) => { e.preventDefault(); handlePTTDown(); }}
|
||||
onTouchEnd={handlePTTUp}
|
||||
disabled={pttDisabled}
|
||||
className={`w-20 h-20 rounded-full text-3xl font-bold transition-all shadow-lg select-none
|
||||
${isRecording
|
||||
? "bg-red-500 scale-110 shadow-red-500/40 animate-pulse"
|
||||
: whisperStatus === "transcribing"
|
||||
? "bg-yellow-500 cursor-wait"
|
||||
: pttDisabled
|
||||
? "bg-gray-700 cursor-not-allowed opacity-50"
|
||||
: "bg-indigo-600 hover:bg-indigo-500 active:scale-95 cursor-pointer"
|
||||
}`}
|
||||
>
|
||||
{pttLabel()}
|
||||
</button>
|
||||
</div>
|
||||
{!liveMode && (
|
||||
<>
|
||||
<div className="flex justify-center">
|
||||
<button
|
||||
onMouseDown={handlePTTDown}
|
||||
onMouseUp={handlePTTUp}
|
||||
onTouchStart={(e) => { e.preventDefault(); handlePTTDown(); }}
|
||||
onTouchEnd={handlePTTUp}
|
||||
disabled={pttDisabled}
|
||||
className={`w-20 h-20 rounded-full text-3xl font-bold transition-all shadow-lg select-none
|
||||
${isRecording
|
||||
? "bg-red-500 scale-110 shadow-red-500/40 animate-pulse"
|
||||
: whisperStatus === "transcribing"
|
||||
? "bg-yellow-500 cursor-wait"
|
||||
: pttDisabled
|
||||
? "bg-gray-700 cursor-not-allowed opacity-50"
|
||||
: "bg-indigo-600 hover:bg-indigo-500 active:scale-95 cursor-pointer"
|
||||
}`}
|
||||
>
|
||||
{isRecording ? "🔴" : whisperStatus === "transcribing" ? "💬" : "🎙"}
|
||||
</button>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
|
||||
<p className="text-center text-xs text-gray-500">{statusLine()}</p>
|
||||
|
||||
|
||||
88
src/hooks/useLiveVoice.ts
Normal file
88
src/hooks/useLiveVoice.ts
Normal file
@@ -0,0 +1,88 @@
|
||||
// src/hooks/useLiveVoice.ts
|
||||
"use client";
|
||||
import { useRef, useState, useCallback, useEffect } from "react";
|
||||
|
||||
type LiveVoiceOptions = {
|
||||
onUtterance: (text: string) => void;
|
||||
onSpeechStart?: () => void;
|
||||
};
|
||||
|
||||
export function useLiveVoice({ onUtterance, onSpeechStart }: LiveVoiceOptions) {
|
||||
const [isListening, setIsListening] = useState(false);
|
||||
const [isSpeaking, setIsSpeaking] = useState(false);
|
||||
const recognitionRef = useRef<SpeechRecognition | null>(null);
|
||||
const stoppedManually = useRef(false);
|
||||
|
||||
const start = useCallback(() => {
|
||||
const SpeechRecognition =
|
||||
window.SpeechRecognition || (window as any).webkitSpeechRecognition;
|
||||
|
||||
if (!SpeechRecognition) {
|
||||
alert("Your browser doesn't support SpeechRecognition. Try Chrome.");
|
||||
return;
|
||||
}
|
||||
|
||||
const recognition = new SpeechRecognition();
|
||||
recognition.continuous = true; // keep listening between utterances
|
||||
recognition.interimResults = false; // only fire when utterance is complete
|
||||
recognition.lang = "en-GB";
|
||||
|
||||
recognition.onstart = () => {
|
||||
setIsListening(true);
|
||||
};
|
||||
|
||||
recognition.onspeechstart = () => {
|
||||
setIsSpeaking(true);
|
||||
onSpeechStart?.();
|
||||
};
|
||||
|
||||
recognition.onspeechend = () => {
|
||||
setIsSpeaking(false);
|
||||
};
|
||||
|
||||
recognition.onresult = (event: SpeechRecognitionEvent) => {
|
||||
const last = event.results[event.results.length - 1];
|
||||
if (last.isFinal) {
|
||||
const text = last[0].transcript.trim();
|
||||
if (text) onUtterance(text);
|
||||
}
|
||||
};
|
||||
|
||||
recognition.onerror = (e: SpeechRecognitionErrorEvent) => {
|
||||
// 'no-speech' is normal background silence — just ignore it
|
||||
if (e.error === "no-speech") return;
|
||||
console.error("SpeechRecognition error:", e.error);
|
||||
};
|
||||
|
||||
recognition.onend = () => {
|
||||
// Auto-restart unless we stopped it manually
|
||||
if (!stoppedManually.current) {
|
||||
recognition.start();
|
||||
} else {
|
||||
setIsListening(false);
|
||||
setIsSpeaking(false);
|
||||
}
|
||||
};
|
||||
|
||||
stoppedManually.current = false;
|
||||
recognition.start();
|
||||
recognitionRef.current = recognition;
|
||||
}, [onUtterance, onSpeechStart]);
|
||||
|
||||
const stop = useCallback(() => {
|
||||
stoppedManually.current = true;
|
||||
recognitionRef.current?.stop();
|
||||
recognitionRef.current = null;
|
||||
setIsListening(false);
|
||||
setIsSpeaking(false);
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
stoppedManually.current = true;
|
||||
recognitionRef.current?.stop();
|
||||
};
|
||||
}, []);
|
||||
|
||||
return { isListening, isSpeaking, start, stop };
|
||||
}
|
||||
Reference in New Issue
Block a user