import React, { useEffect, useRef, useState } from "react"; import css from "./Playground.module.scss"; import toast from "react-hot-toast"; import { v4 as uuidv4 } from "uuid"; import { useNavigate } from "react-router-dom"; import { GiBroom } from "react-icons/gi"; import { RiAiGenerate } from "react-icons/ri"; const voices = [ { name: "JILL", value: "jill", }, { name: "JACK", value: "jack", }, ]; const Playground = () => { const navigate = useNavigate(); const [temperature, setTemperature] = useState(0.7); const [threshold, setThreshold] = useState(0.5); const [prefixPadding, setPrefixPadding] = useState(300); const [silenceDuration, setSilenceDuration] = useState(100); const [mobileView, setmobileView] = useState(false); const [generating, setgenerating] = useState(false); const [isVoices, setisVoices] = useState(false); const [selectedVoice, setselectedVoice] = useState(voices[0]); const [ismicopen, setismicopen] = useState(false); const [ismic, setismic] = useState(false); const [audioStream, setAudioStream] = useState(null); const [audioContext, setAudioContext] = useState(null); const sourceRef = useRef(null); const audioContextRef = useRef(null); const playing = useRef(false); const [isplaying, setisplaying] = useState(false); const lastshifted = useRef(null); const bufferQueue = useRef([]); const socket = useRef(null); const isgrpc = useRef(null); const [isgrpcs, setisgrpc] = useState(false); const [socketConnected, setsocketConnected] = useState(false); const [audioContextState, setaudioContextState] = useState(false); if (!sessionStorage.getItem("sessionId")) { const sessionId = uuidv4(); sessionStorage.setItem("sessionId", sessionId); } const [session, setsession] = useState(sessionStorage.getItem("sessionId")); const [system_prompt, setsetsystemPrompt] = useState(""); const [elapsedTime, setElapsedTime] = useState(0); const [latency, setlatency] = useState(0); const [chathistory, setchathistory] = useState([]); const msgref = useRef(null); useEffect(() => { if (chathistory) { msgref?.current?.scrollIntoView({ behavior: "smooth" }); } }, [chathistory]); const handlePlay = async (da) => { try { if (!isgrpc.current) { return; } playing.current = true; setisplaying(true); const base64Data = bufferQueue.current.shift(); lastshifted.current = base64Data; const bytes = new Uint8Array( atob(base64Data) .split("") .map((char) => char.charCodeAt(0)) ); let arrayBuffer = bytes.buffer; const metadataEndIndex = bytes.indexOf(0); let metadata = new TextDecoder().decode(bytes.slice(0, metadataEndIndex)); metadata = JSON.parse(metadata); const { session_id, sequence_id, transcript } = metadata; if (sequence_id !== "-2") { if (socket.interval) { clearInterval(socket.interval); socket.interval = null; setlatency((prev) => prev + 150); } arrayBuffer = arrayBuffer.slice( metadataEndIndex + 1, arrayBuffer.length ); try { if (audioContextRef.current.state === "suspended") { await audioContextRef.current.resume(); } const audioBuffer = await audioContextRef.current.decodeAudioData( arrayBuffer ); if (sourceRef.current) { sourceRef.current.disconnect(); } const source = audioContextRef.current.createBufferSource(); source.buffer = audioBuffer; source.connect(audioContextRef.current.destination); source.start(0); sourceRef.current = source; sourceRef.current.session_id = session_id; sourceRef.current.sequence_id = sequence_id; sourceRef.current.transcript = transcript; sourceRef.current.onended = () => { lastshifted.current = null; if ( socket.current.readyState === WebSocket.OPEN && sourceRef?.current?.sequence_id ) { socket.current.send( JSON.stringify({ type: "status", msg: { session_id: sourceRef?.current?.session_id, sequence_id: sourceRef?.current?.sequence_id, transcript: sourceRef?.current?.transcript, }, }) ); } if (bufferQueue.current.length > 0) { playing.current = true; setisplaying(true); const da = Date.now(); handlePlay(da); } else { playing.current = false; setisplaying(false); } }; } catch (error) { console.error("Error decoding audio data:", error); } } else { const startTime = Date.now(); const intervall = setInterval(() => { setlatency(Date.now() - startTime); }, 10); socket.interval = intervall; if (bufferQueue.current.length > 0) { playing.current = true; setisplaying(true); const da = Date.now(); handlePlay(da); } else { playing.current = false; setisplaying(false); } } } catch (error) { console.error("Error in handlePlay: ", error); } }; const handlemicchange = async () => { if (ismicopen && sourceRef.current) { sourceRef.current.onended = null; sourceRef.current.stop(); sourceRef.current.disconnect(); sourceRef.current = null; bufferQueue.current = []; } if (!ismicopen) { setlatency(0); setismicopen(true); await connectToRealtimeTTS(); socket.current.send( JSON.stringify({ type: "start", msg: JSON.stringify({ temperature: temperature, prefixPadding: prefixPadding, silenceDuration: silenceDuration, voice: selectedVoice?.value, threshold: threshold, system_prompt: system_prompt, sessionId: session, }), }) ); } else { stopAudioStream(); } ismic ? setismic(false) : setismic(true); }; // useEffect(() => { const connectToRealtimeTTS = async () => { return new Promise((resolve, reject) => { try { const audioContext = new (window.AudioContext || window.webkitAudioContext)(); audioContextRef.current = audioContext; setaudioContextState(true); const websocketURL = process.env.REACT_APP_WEBSOCKET_URL.includes( "localhost" ) ? `ws://${process.env.REACT_APP_WEBSOCKET_URL}/v2v` : `wss://${process.env.REACT_APP_WEBSOCKET_URL}/v2v`; const ws = new WebSocket(websocketURL); ws.onopen = () => { // console.log("WebSocket connected"); // socket.current = ws; // setsocketConnected(true); // toast.success("Now You can speak..."); }; ws.onmessage = async (event) => { try { const data = JSON.parse(event.data); const { type, msg } = data; switch (type) { case "initial": socket.current = ws; setsocketConnected(true); resolve(null); break; case "media": const da = Date.now(); bufferQueue.current.push(msg); if (!playing.current && bufferQueue.current.length > 0) { handlePlay(da); } break; case "info": toast.error(msg); break; case "ready": isgrpc.current = true; setisgrpc(true); await startAudioStream(); break; case "pause": if (sourceRef.current) { sourceRef.current.onended = null; sourceRef.current.stop(); sourceRef.current.disconnect(); sourceRef.current = null; } playing.current = false; setgenerating(true); setisplaying(false); break; case "continue": if (lastshifted.current) { bufferQueue.current.unshift(lastshifted.current); lastshifted.current = null; } setgenerating(false); const daa = Date.now(); handlePlay(daa); if (socket.interval) { clearInterval(socket.interval); socket.interval = null; } break; case "clear": bufferQueue.current = []; playing.current = false; setgenerating(false); setisplaying(false); if (sourceRef.current) { sourceRef.current.onended = null; sourceRef.current.stop(); sourceRef.current.disconnect(); sourceRef.current = null; } break; case "end": try { audioContext .close() .then(() => { setaudioContextState(false); if (ismicopen) { toast.error("Please restart the conversation."); } }) .catch((err) => { if (ismicopen) { toast.error("Please restart the conversation."); } }); await stopAudioStream(); setismicopen(false); } catch (error) { // console.error("Error in closing audioContext."); } break; case "chathistory": setchathistory(msg); break; default: break; } } catch (error) { // console.error("Error in websocket media."); } }; ws.onclose = async () => { try { await audioStream.getTracks().forEach((track) => track.stop()); setAudioStream(null); setElapsedTime(0); } catch (err) { // console.error(err); } }; ws.onerror = (err) => { // console.error("Websocket Error", err); }; } catch (err) { console.error("Error in making websocket connection."); reject(null); } }); }; function floatTo16BitPCM(input) { const output = new Int16Array(input.length); for (let i = 0; i < input.length; i++) { const sample = Math.max(-1, Math.min(1, input[i])); output[i] = sample < 0 ? sample * 0x8000 : sample * 0x7fff; } return output.buffer; } const startAudioStream = async () => { try { const startTime = Date.now(); setchathistory([]); const interval = setInterval(() => { setElapsedTime(Date.now() - startTime); }, 1000); if (!socket.current) { toast.error("Please try again.. Socket"); return; } audioContextRef.current = new (window.AudioContext || window.webkitAudioContext)(); setismicopen(true); const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); setAudioStream(stream); const audioContext = new AudioContext({ sampleRate: 16000, }); setAudioContext(audioContext); const audioInput = audioContext.createMediaStreamSource(stream); const bufferSize = 2048; const scriptProcessorNode = audioContext.createScriptProcessor( bufferSize, 1, 1 ); scriptProcessorNode.onaudioprocess = async (e) => { const inputData = e.inputBuffer.getChannelData(0); const l16Data = floatTo16BitPCM(inputData); try { if (!isgrpc.current) { try { if (audioContextState) { audioContext .close() .then(() => { setaudioContextState(false); toast.error("Please restart the conversation."); }) .catch((err) => { toast.error("Please restart the conversation."); // console.error(err); }); await stopAudioStream(); toast.error("Please start again."); setismicopen(false); } } catch (error) { // console.error("Error in closing audioContext."); } } if ( isgrpc.current && socket.current && socket.current.readyState === WebSocket.OPEN ) { // if (socket.current.grpc) { socket.current.send(l16Data); } } catch (err) { // console.error("Error in sending buffer."); } }; audioInput.connect(scriptProcessorNode); scriptProcessorNode.connect(audioContext.destination); } catch (error) { // console.error("Error accessing microphone:", error); } }; const stopAudioStream = async () => { setgenerating(false); setismicopen(false); setElapsedTime(0); setisgrpc(false); bufferQueue.current = []; if (socket.current.readyState === WebSocket.OPEN) { socket.current.close(); } if (socket.interval) { clearInterval(socket.interval); socket.current = null; } if ( socket.current && audioStream && socket.current.readyState === WebSocket.OPEN ) { try { if (audioStream) { await audioStream.getTracks().forEach((track) => track.stop()); setAudioStream(null); } } catch (err) { // console.error(err); } try { if (socket.current.readyState === WebSocket.OPEN) { isgrpc.current = false; socket.current.send(JSON.stringify({ type: "stop", msg: "stop" })); } audioContext .close() .then(() => { // console.log("AudioContext closed."); }) .catch((err) => { // console.error("Error in closing the audioContext.", err); }); } catch (err) { // console.error("Error in closing the audioContext."); } } }; const getButtonText = () => { if (!ismicopen) { return "Start"; } if (ismicopen && !isgrpcs) { return "Starting"; } if (isgrpcs) { return "Stop"; } return ""; }; const formatTime = (milliseconds) => { let totalSeconds = Math.floor(milliseconds / 1000); let hours = Math.floor(totalSeconds / 3600); let minutes = Math.floor((totalSeconds % 3600) / 60); let seconds = totalSeconds % 60; return `${String(hours).padStart(2, "0")}:${String(minutes).padStart( 2, "0" )}:${String(seconds).padStart(2, "0")}`; }; return (
{chathistory.length > 0 && chathistory.map((item, index) => { return ( <>
{/* {item.timestamp} */}
{item.speaker}
{item.content}
{/* {item.content} */}
); })}
{socketConnected || !socketConnected ? (
{/* {!ismicopen ? "00:00:00" : formatTime(elapsedTime)} */} {`${latency} ms`}
) : (
Connecting...
)}
); }; export default Playground;