added live mode for speach with utterence detection

This commit is contained in:
Will
2026-03-29 19:34:00 +01:00
parent d577528e47
commit 58392ea198
2 changed files with 192 additions and 50 deletions

View File

@@ -1,18 +1,51 @@
// src/app/page.tsx
"use client";
import { useState, useRef, useEffect } from "react";
import { useState, useRef, useEffect, useCallback } from "react";
import { useChat } from "@/hooks/useChat";
import { useWhisper } from "@/hooks/useWhisper";
import { useVoiceRecorder } from "@/hooks/useVoiceRecorder";
import { useLiveVoice } from "@/hooks/useLiveVoice";
import { stopSpeaking } from "@/lib/tts";
export default function Home() {
const [textInput, setTextInput] = useState("");
const [liveMode, setLiveMode] = useState(false);
const [isSpeaking, setIsSpeaking] = useState(false);
const { messages, isLoading, sendMessage } = useChat();
const { status: whisperStatus, modelMessage, transcribe } = useWhisper();
const { status: whisperStatus, transcribe } = useWhisper();
const { isRecording, startRecording, stopRecording } = useVoiceRecorder();
const bottomRef = useRef<HTMLDivElement>(null);
const handleUtterance = useCallback(
(text: string) => {
stopSpeaking();
sendMessage(text, "voice");
},
[sendMessage]
);
const { isListening, isSpeaking: vadSpeaking, start: startLive, stop: stopLive } =
useLiveVoice({
onUtterance: handleUtterance,
onSpeechStart: () => setIsSpeaking(true),
});
// Sync VAD speaking state
useEffect(() => {
setIsSpeaking(vadSpeaking);
}, [vadSpeaking]);
const handleLiveToggle = () => {
if (!liveMode) {
setLiveMode(true);
startLive();
} else {
setLiveMode(false);
stopLive();
setIsSpeaking(false);
}
};
useEffect(() => {
bottomRef.current?.scrollIntoView({ behavior: "smooth" });
}, [messages]);
@@ -37,22 +70,15 @@ export default function Home() {
if (text) sendMessage(text, "voice");
};
const pttDisabled =
whisperStatus !== "ready" || isLoading;
const pttLabel = () => {
if (whisperStatus === "loading") return "⏳";
if (whisperStatus === "transcribing") return "💬";
if (isRecording) return "🔴";
return "🎙";
};
const pttDisabled = whisperStatus !== "ready" || isLoading || liveMode;
const statusLine = () => {
if (whisperStatus === "loading") return modelMessage;
if (whisperStatus === "transcribing") return "Transcribing on-device…";
if (isRecording) return "Recording… release to send";
if (whisperStatus === "ready") return "Hold to talk — Whisper ready ✓";
return "Initialising Whisper…";
if (liveMode && isSpeaking) return "🎙 Hearing you…";
if (liveMode && isLoading) return "⏳ Claw is thinking…";
if (liveMode) return "👂 Listening — just speak naturally";
if (whisperStatus === "transcribing") return "💬 Transcribing…";
if (isRecording) return "🔴 Recording… release to send";
return "Hold to talk";
};
return (
@@ -64,23 +90,48 @@ export default function Home() {
<h1 className="text-xl font-bold tracking-tight">OpenClaw Voice</h1>
<p className="text-xs text-gray-500">On-device Whisper · No API keys</p>
</div>
<span
className={`ml-auto w-2 h-2 rounded-full ${
whisperStatus === "ready" ? "bg-green-400" : "bg-yellow-400 animate-pulse"
}`}
/>
{/* Live Mode Toggle */}
<div className="ml-auto flex items-center gap-2">
<span className={`text-xs font-medium ${liveMode ? "text-green-400" : "text-gray-500"}`}>
Live
</span>
<button
onClick={handleLiveToggle}
className={`relative inline-flex h-6 w-11 items-center rounded-full transition-colors focus:outline-none
${liveMode ? "bg-green-500" : "bg-gray-700"}`}
>
<span
className={`inline-block h-4 w-4 transform rounded-full bg-white shadow transition-transform
${liveMode ? "translate-x-6" : "translate-x-1"}`}
/>
</button>
</div>
</header>
{/* Live mode indicator bar */}
{liveMode && (
<div
className={`flex items-center justify-center gap-2 py-1.5 text-xs font-medium transition-all
${isSpeaking ? "bg-green-600 text-white" : "bg-green-900/40 text-green-400"}`}
>
<span
className={`w-1.5 h-1.5 rounded-full ${
isSpeaking ? "bg-white animate-ping" : "bg-green-400"
}`}
/>
{isSpeaking ? "Speech detected" : "Waiting for speech…"}
</div>
)}
{/* Messages */}
<div className="flex-1 overflow-y-auto px-4 py-6 space-y-4">
{messages.length === 0 && (
<div className="text-center mt-20 space-y-2">
<p className="text-gray-500 text-sm">
{whisperStatus === "ready"
? "Whisper loaded. Hold the button to talk or type below."
: modelMessage || "Loading Whisper model…"}
</p>
</div>
<p className="text-center text-gray-600 mt-20 text-sm">
{liveMode
? "Live mode on — just start talking"
: "Hold the button to talk, or type below."}
</p>
)}
{messages.map((msg) => (
<div
@@ -96,7 +147,7 @@ export default function Home() {
>
{msg.source === "voice" && (
<span className="text-xs opacity-40 block mb-1">
{msg.role === "user" ? "🎙 transcribed" : "🔊 spoken"}
{msg.role === "user" ? "🎙 live" : "🔊 spoken"}
</span>
)}
{msg.content || <span className="opacity-40 animate-pulse"></span>}
@@ -108,27 +159,30 @@ export default function Home() {
{/* Controls */}
<div className="border-t border-gray-800 bg-gray-900 px-4 py-4 space-y-3">
{/* PTT Button */}
<div className="flex justify-center">
<button
onMouseDown={handlePTTDown}
onMouseUp={handlePTTUp}
onTouchStart={(e) => { e.preventDefault(); handlePTTDown(); }}
onTouchEnd={handlePTTUp}
disabled={pttDisabled}
className={`w-20 h-20 rounded-full text-3xl font-bold transition-all shadow-lg select-none
${isRecording
? "bg-red-500 scale-110 shadow-red-500/40 animate-pulse"
: whisperStatus === "transcribing"
? "bg-yellow-500 cursor-wait"
: pttDisabled
? "bg-gray-700 cursor-not-allowed opacity-50"
: "bg-indigo-600 hover:bg-indigo-500 active:scale-95 cursor-pointer"
}`}
>
{pttLabel()}
</button>
</div>
{!liveMode && (
<>
<div className="flex justify-center">
<button
onMouseDown={handlePTTDown}
onMouseUp={handlePTTUp}
onTouchStart={(e) => { e.preventDefault(); handlePTTDown(); }}
onTouchEnd={handlePTTUp}
disabled={pttDisabled}
className={`w-20 h-20 rounded-full text-3xl font-bold transition-all shadow-lg select-none
${isRecording
? "bg-red-500 scale-110 shadow-red-500/40 animate-pulse"
: whisperStatus === "transcribing"
? "bg-yellow-500 cursor-wait"
: pttDisabled
? "bg-gray-700 cursor-not-allowed opacity-50"
: "bg-indigo-600 hover:bg-indigo-500 active:scale-95 cursor-pointer"
}`}
>
{isRecording ? "🔴" : whisperStatus === "transcribing" ? "💬" : "🎙"}
</button>
</div>
</>
)}
<p className="text-center text-xs text-gray-500">{statusLine()}</p>