diff --git a/src/app/page.tsx b/src/app/page.tsx index ed77021..e7192c2 100644 --- a/src/app/page.tsx +++ b/src/app/page.tsx @@ -1,18 +1,51 @@ // src/app/page.tsx "use client"; -import { useState, useRef, useEffect } from "react"; +import { useState, useRef, useEffect, useCallback } from "react"; import { useChat } from "@/hooks/useChat"; import { useWhisper } from "@/hooks/useWhisper"; import { useVoiceRecorder } from "@/hooks/useVoiceRecorder"; +import { useLiveVoice } from "@/hooks/useLiveVoice"; import { stopSpeaking } from "@/lib/tts"; export default function Home() { const [textInput, setTextInput] = useState(""); + const [liveMode, setLiveMode] = useState(false); + const [isSpeaking, setIsSpeaking] = useState(false); const { messages, isLoading, sendMessage } = useChat(); - const { status: whisperStatus, modelMessage, transcribe } = useWhisper(); + const { status: whisperStatus, transcribe } = useWhisper(); const { isRecording, startRecording, stopRecording } = useVoiceRecorder(); const bottomRef = useRef(null); + const handleUtterance = useCallback( + (text: string) => { + stopSpeaking(); + sendMessage(text, "voice"); + }, + [sendMessage] + ); + + const { isListening, isSpeaking: vadSpeaking, start: startLive, stop: stopLive } = + useLiveVoice({ + onUtterance: handleUtterance, + onSpeechStart: () => setIsSpeaking(true), + }); + + // Sync VAD speaking state + useEffect(() => { + setIsSpeaking(vadSpeaking); + }, [vadSpeaking]); + + const handleLiveToggle = () => { + if (!liveMode) { + setLiveMode(true); + startLive(); + } else { + setLiveMode(false); + stopLive(); + setIsSpeaking(false); + } + }; + useEffect(() => { bottomRef.current?.scrollIntoView({ behavior: "smooth" }); }, [messages]); @@ -37,22 +70,15 @@ export default function Home() { if (text) sendMessage(text, "voice"); }; - const pttDisabled = - whisperStatus !== "ready" || isLoading; - - const pttLabel = () => { - if (whisperStatus === "loading") return "⏳"; - if (whisperStatus === "transcribing") return "💬"; - if (isRecording) return "🔴"; - return "🎙"; - }; + const pttDisabled = whisperStatus !== "ready" || isLoading || liveMode; const statusLine = () => { - if (whisperStatus === "loading") return modelMessage; - if (whisperStatus === "transcribing") return "Transcribing on-device…"; - if (isRecording) return "Recording… release to send"; - if (whisperStatus === "ready") return "Hold to talk — Whisper ready ✓"; - return "Initialising Whisper…"; + if (liveMode && isSpeaking) return "🎙 Hearing you…"; + if (liveMode && isLoading) return "⏳ Claw is thinking…"; + if (liveMode) return "👂 Listening — just speak naturally"; + if (whisperStatus === "transcribing") return "💬 Transcribing…"; + if (isRecording) return "🔴 Recording… release to send"; + return "Hold to talk"; }; return ( @@ -64,23 +90,48 @@ export default function Home() {

OpenClaw Voice

On-device Whisper · No API keys

- + + {/* Live Mode Toggle */} +
+ + Live + + +
+ {/* Live mode indicator bar */} + {liveMode && ( +
+ + {isSpeaking ? "Speech detected" : "Waiting for speech…"} +
+ )} + {/* Messages */}
{messages.length === 0 && ( -
-

- {whisperStatus === "ready" - ? "Whisper loaded. Hold the button to talk or type below." - : modelMessage || "Loading Whisper model…"} -

-
+

+ {liveMode + ? "Live mode on — just start talking" + : "Hold the button to talk, or type below."} +

)} {messages.map((msg) => (
{msg.source === "voice" && ( - {msg.role === "user" ? "🎙 transcribed" : "🔊 spoken"} + {msg.role === "user" ? "🎙 live" : "🔊 spoken"} )} {msg.content || } @@ -108,27 +159,30 @@ export default function Home() { {/* Controls */}
- {/* PTT Button */} -
- -
+ {!liveMode && ( + <> +
+ +
+ + )}

{statusLine()}

diff --git a/src/hooks/useLiveVoice.ts b/src/hooks/useLiveVoice.ts new file mode 100644 index 0000000..1973db3 --- /dev/null +++ b/src/hooks/useLiveVoice.ts @@ -0,0 +1,88 @@ +// src/hooks/useLiveVoice.ts +"use client"; +import { useRef, useState, useCallback, useEffect } from "react"; + +type LiveVoiceOptions = { + onUtterance: (text: string) => void; + onSpeechStart?: () => void; +}; + +export function useLiveVoice({ onUtterance, onSpeechStart }: LiveVoiceOptions) { + const [isListening, setIsListening] = useState(false); + const [isSpeaking, setIsSpeaking] = useState(false); + const recognitionRef = useRef(null); + const stoppedManually = useRef(false); + + const start = useCallback(() => { + const SpeechRecognition = + window.SpeechRecognition || (window as any).webkitSpeechRecognition; + + if (!SpeechRecognition) { + alert("Your browser doesn't support SpeechRecognition. Try Chrome."); + return; + } + + const recognition = new SpeechRecognition(); + recognition.continuous = true; // keep listening between utterances + recognition.interimResults = false; // only fire when utterance is complete + recognition.lang = "en-GB"; + + recognition.onstart = () => { + setIsListening(true); + }; + + recognition.onspeechstart = () => { + setIsSpeaking(true); + onSpeechStart?.(); + }; + + recognition.onspeechend = () => { + setIsSpeaking(false); + }; + + recognition.onresult = (event: SpeechRecognitionEvent) => { + const last = event.results[event.results.length - 1]; + if (last.isFinal) { + const text = last[0].transcript.trim(); + if (text) onUtterance(text); + } + }; + + recognition.onerror = (e: SpeechRecognitionErrorEvent) => { + // 'no-speech' is normal background silence — just ignore it + if (e.error === "no-speech") return; + console.error("SpeechRecognition error:", e.error); + }; + + recognition.onend = () => { + // Auto-restart unless we stopped it manually + if (!stoppedManually.current) { + recognition.start(); + } else { + setIsListening(false); + setIsSpeaking(false); + } + }; + + stoppedManually.current = false; + recognition.start(); + recognitionRef.current = recognition; + }, [onUtterance, onSpeechStart]); + + const stop = useCallback(() => { + stoppedManually.current = true; + recognitionRef.current?.stop(); + recognitionRef.current = null; + setIsListening(false); + setIsSpeaking(false); + }, []); + + useEffect(() => { + return () => { + stoppedManually.current = true; + recognitionRef.current?.stop(); + }; + }, []); + + return { isListening, isSpeaking, start, stop }; +}