import React, { useEffect, useRef, useState } from "react"; |
import css from "./Playground.module.scss"; |
import toast from "react-hot-toast"; |
import { v4 as uuidv4 } from "uuid"; |
import { useNavigate } from "react-router-dom"; |
import { GiBroom } from "react-icons/gi"; |
import { RiAiGenerate } from "react-icons/ri"; |
const voices = [ |
{ |
name: "JILL", |
value: "jill", |
}, |
{ |
name: "JACK", |
value: "jack", |
}, |
]; |
const Playground = () => { |
const navigate = useNavigate(); |
const [temperature, setTemperature] = useState(0.7); |
const [threshold, setThreshold] = useState(0.5); |
const [prefixPadding, setPrefixPadding] = useState(300); |
const [silenceDuration, setSilenceDuration] = useState(100); |
const [mobileView, setmobileView] = useState(false); |
const [generating, setgenerating] = useState(false); |
const [isVoices, setisVoices] = useState(false); |
const [selectedVoice, setselectedVoice] = useState(voices[0]); |
const [ismicopen, setismicopen] = useState(false); |
const [ismic, setismic] = useState(false); |
const [audioStream, setAudioStream] = useState(null); |
const [audioContext, setAudioContext] = useState(null); |
const sourceRef = useRef(null); |
const audioContextRef = useRef(null); |
const playing = useRef(false); |
const [isplaying, setisplaying] = useState(false); |
const lastshifted = useRef(null); |
const bufferQueue = useRef([]); |
const socket = useRef(null); |
const isgrpc = useRef(null); |
const [isgrpcs, setisgrpc] = useState(false); |
const [socketConnected, setsocketConnected] = useState(false); |
const [audioContextState, setaudioContextState] = useState(false); |
if (!sessionStorage.getItem("sessionId")) { |
const sessionId = uuidv4(); |
sessionStorage.setItem("sessionId", sessionId); |
} |
const [session, setsession] = useState(sessionStorage.getItem("sessionId")); |
const [system_prompt, setsetsystemPrompt] = useState(""); |
const [elapsedTime, setElapsedTime] = useState(0); |
const [latency, setlatency] = useState(0); |
const [chathistory, setchathistory] = useState([]); |
const msgref = useRef(null); |
useEffect(() => { |
if (chathistory) { |
msgref?.current?.scrollIntoView({ behavior: "smooth" }); |
} |
}, [chathistory]); |
const handlePlay = async (da) => { |
try { |
if (!isgrpc.current) { |
return; |
} |
playing.current = true; |
setisplaying(true); |
const base64Data = bufferQueue.current.shift(); |
lastshifted.current = base64Data; |
const bytes = new Uint8Array( |
atob(base64Data) |
.split("") |
.map((char) => char.charCodeAt(0)) |
); |
let arrayBuffer = bytes.buffer; |
const metadataEndIndex = bytes.indexOf(0); |
let metadata = new TextDecoder().decode(bytes.slice(0, metadataEndIndex)); |
metadata = JSON.parse(metadata); |
const { session_id, sequence_id, transcript } = metadata; |
if (sequence_id !== "-2") { |
if (socket.interval) { |
clearInterval(socket.interval); |
socket.interval = null; |
setlatency((prev) => prev + 150); |
} |
arrayBuffer = arrayBuffer.slice( |
metadataEndIndex + 1, |
arrayBuffer.length |
); |
try { |
if (audioContextRef.current.state === "suspended") { |
await audioContextRef.current.resume(); |
} |
const audioBuffer = await audioContextRef.current.decodeAudioData( |
arrayBuffer |
); |
if (sourceRef.current) { |
sourceRef.current.disconnect(); |
} |
const source = audioContextRef.current.createBufferSource(); |
source.buffer = audioBuffer; |
source.connect(audioContextRef.current.destination); |
source.start(0); |
sourceRef.current = source; |
sourceRef.current.session_id = session_id; |
sourceRef.current.sequence_id = sequence_id; |
sourceRef.current.transcript = transcript; |
sourceRef.current.onended = () => { |
lastshifted.current = null; |
if ( |
socket.current.readyState === WebSocket.OPEN && |
sourceRef?.current?.sequence_id |
) { |
socket.current.send( |
JSON.stringify({ |
type: "status", |
msg: { |
session_id: sourceRef?.current?.session_id, |
sequence_id: sourceRef?.current?.sequence_id, |
transcript: sourceRef?.current?.transcript, |
}, |
}) |
); |
} |
if (bufferQueue.current.length > 0) { |
playing.current = true; |
setisplaying(true); |
const da = Date.now(); |
handlePlay(da); |
} else { |
playing.current = false; |
setisplaying(false); |
} |
}; |
} catch (error) { |
console.error("Error decoding audio data:", error); |
} |
} else { |
const startTime = Date.now(); |
const intervall = setInterval(() => { |
setlatency(Date.now() - startTime); |
}, 10); |
socket.interval = intervall; |
if (bufferQueue.current.length > 0) { |
playing.current = true; |
setisplaying(true); |
const da = Date.now(); |
handlePlay(da); |
} else { |
playing.current = false; |
setisplaying(false); |
} |
} |
} catch (error) { |
console.error("Error in handlePlay: ", error); |
} |
}; |
const handlemicchange = async () => { |
if (ismicopen && sourceRef.current) { |
sourceRef.current.onended = null; |
sourceRef.current.stop(); |
sourceRef.current.disconnect(); |
sourceRef.current = null; |
bufferQueue.current = []; |
} |
if (!ismicopen) { |
setlatency(0); |
setismicopen(true); |
await connectToRealtimeTTS(); |
socket.current.send( |
JSON.stringify({ |
type: "start", |
msg: JSON.stringify({ |
temperature: temperature, |
prefixPadding: prefixPadding, |
silenceDuration: silenceDuration, |
voice: selectedVoice?.value, |
threshold: threshold, |
system_prompt: system_prompt, |
sessionId: session, |
}), |
}) |
); |
} else { |
stopAudioStream(); |
} |
ismic ? setismic(false) : setismic(true); |
}; |
const connectToRealtimeTTS = async () => { |
return new Promise((resolve, reject) => { |
try { |
const audioContext = new (window.AudioContext || |
window.webkitAudioContext)(); |
audioContextRef.current = audioContext; |
setaudioContextState(true); |
const websocketURL = process.env.REACT_APP_WEBSOCKET_URL.includes( |
"localhost" |
) |
? `ws://${process.env.REACT_APP_WEBSOCKET_URL}/v2v` |
: `wss://${process.env.REACT_APP_WEBSOCKET_URL}/v2v`; |
const ws = new WebSocket(websocketURL); |
ws.onopen = () => { |
}; |
ws.onmessage = async (event) => { |
try { |
const data = JSON.parse(event.data); |
const { type, msg } = data; |
switch (type) { |
case "initial": |
socket.current = ws; |
setsocketConnected(true); |
resolve(null); |
break; |
case "media": |
const da = Date.now(); |
bufferQueue.current.push(msg); |
if (!playing.current && bufferQueue.current.length > 0) { |
handlePlay(da); |
} |
break; |
case "info": |
toast.error(msg); |
break; |
case "ready": |
isgrpc.current = true; |
setisgrpc(true); |
await startAudioStream(); |
break; |
case "pause": |
if (sourceRef.current) { |
sourceRef.current.onended = null; |
sourceRef.current.stop(); |
sourceRef.current.disconnect(); |
sourceRef.current = null; |
} |
playing.current = false; |
setgenerating(true); |
setisplaying(false); |
break; |
case "continue": |
if (lastshifted.current) { |
bufferQueue.current.unshift(lastshifted.current); |
lastshifted.current = null; |
} |
setgenerating(false); |
const daa = Date.now(); |
handlePlay(daa); |
if (socket.interval) { |
clearInterval(socket.interval); |
socket.interval = null; |
} |
break; |
case "clear": |
bufferQueue.current = []; |
playing.current = false; |
setgenerating(false); |
setisplaying(false); |
if (sourceRef.current) { |
sourceRef.current.onended = null; |
sourceRef.current.stop(); |
sourceRef.current.disconnect(); |
sourceRef.current = null; |
} |
break; |
case "end": |
try { |
audioContext |
.close() |
.then(() => { |
setaudioContextState(false); |
if (ismicopen) { |
toast.error("Please restart the conversation."); |
} |
}) |
.catch((err) => { |
if (ismicopen) { |
toast.error("Please restart the conversation."); |
} |
}); |
await stopAudioStream(); |
setismicopen(false); |
} catch (error) { |
} |
break; |
case "chathistory": |
setchathistory(msg); |
break; |
default: |
break; |
} |
} catch (error) { |
} |
}; |
ws.onclose = async () => { |
try { |
await audioStream.getTracks().forEach((track) => track.stop()); |
setAudioStream(null); |
setElapsedTime(0); |
} catch (err) { |
} |
}; |
ws.onerror = (err) => { |
}; |
} catch (err) { |
console.error("Error in making websocket connection."); |
reject(null); |
} |
}); |
}; |
function floatTo16BitPCM(input) { |
const output = new Int16Array(input.length); |
for (let i = 0; i < input.length; i++) { |
const sample = Math.max(-1, Math.min(1, input[i])); |
output[i] = sample < 0 ? sample * 0x8000 : sample * 0x7fff; |
} |
return output.buffer; |
} |
const startAudioStream = async () => { |
try { |
const startTime = Date.now(); |
setchathistory([]); |
const interval = setInterval(() => { |
setElapsedTime(Date.now() - startTime); |
}, 1000); |
if (!socket.current) { |
toast.error("Please try again.. Socket"); |
return; |
} |
audioContextRef.current = new (window.AudioContext || |
window.webkitAudioContext)(); |
setismicopen(true); |
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); |
setAudioStream(stream); |
const audioContext = new AudioContext({ |
sampleRate: 16000, |
}); |
setAudioContext(audioContext); |
const audioInput = audioContext.createMediaStreamSource(stream); |
const bufferSize = 2048; |
const scriptProcessorNode = audioContext.createScriptProcessor( |
bufferSize, |
1, |
1 |
); |
scriptProcessorNode.onaudioprocess = async (e) => { |
const inputData = e.inputBuffer.getChannelData(0); |
const l16Data = floatTo16BitPCM(inputData); |
try { |
if (!isgrpc.current) { |
try { |
if (audioContextState) { |
audioContext |
.close() |
.then(() => { |
setaudioContextState(false); |
toast.error("Please restart the conversation."); |
}) |
.catch((err) => { |
toast.error("Please restart the conversation."); |
}); |
await stopAudioStream(); |
toast.error("Please start again."); |
setismicopen(false); |
} |
} catch (error) { |
} |
} |
if ( |
isgrpc.current && |
socket.current && |
socket.current.readyState === WebSocket.OPEN |
) { |
socket.current.send(l16Data); |
} |
} catch (err) { |
} |
}; |
audioInput.connect(scriptProcessorNode); |
scriptProcessorNode.connect(audioContext.destination); |
} catch (error) { |
} |
}; |
const stopAudioStream = async () => { |
setgenerating(false); |
setismicopen(false); |
setElapsedTime(0); |
setisgrpc(false); |
bufferQueue.current = []; |
if (socket.current.readyState === WebSocket.OPEN) { |
socket.current.close(); |
} |
if (socket.interval) { |
clearInterval(socket.interval); |
socket.current = null; |
} |
if ( |
socket.current && |
audioStream && |
socket.current.readyState === WebSocket.OPEN |
) { |
try { |
if (audioStream) { |
await audioStream.getTracks().forEach((track) => track.stop()); |
setAudioStream(null); |
} |
} catch (err) { |
} |
try { |
if (socket.current.readyState === WebSocket.OPEN) { |
isgrpc.current = false; |
socket.current.send(JSON.stringify({ type: "stop", msg: "stop" })); |
} |
audioContext |
.close() |
.then(() => { |
}) |
.catch((err) => { |
}); |
} catch (err) { |
} |
} |
}; |
const getButtonText = () => { |
if (!ismicopen) { |
return "Start"; |
} |
if (ismicopen && !isgrpcs) { |
return "Starting"; |
} |
if (isgrpcs) { |
return "Stop"; |
} |
return ""; |
}; |
const formatTime = (milliseconds) => { |
let totalSeconds = Math.floor(milliseconds / 1000); |
let hours = Math.floor(totalSeconds / 3600); |
let minutes = Math.floor((totalSeconds % 3600) / 60); |
let seconds = totalSeconds % 60; |
return `${String(hours).padStart(2, "0")}:${String(minutes).padStart( |
2, |
"0" |
)}:${String(seconds).padStart(2, "0")}`; |
}; |
return ( |
<div className={css.main}> |
<div className={css.container}> |
<div className={css.realtime}> |
<div className={`${css.clarbtn} ${!mobileView && css.marginclear}`}> |
<button |
onClick={() => { |
sessionStorage.removeItem("sessionId"); |
const sessionId = uuidv4(); |
setchathistory([]); |
sessionStorage.setItem("sessionId", sessionId); |
setsession(sessionStorage.getItem("sessionId")); |
toast.success("New Session Started."); |
}} |
> |
<GiBroom className={css.broom} /> |
clear |
</button> |
</div> |
<div className={css.conversation}> |
{chathistory.length > 0 && |
chathistory.map((item, index) => { |
return ( |
<> |
<div className={css.message} key={index}> |
{/* <span>{item.timestamp}</span> */} |
<div className={css.msg}> |
<div className={css.speaker}>{item.speaker}</div> |
<div>{item.content}</div> |
{/* <Markdown remarkPlugins={[remarkGfm]} className={css.markdown}>{item.content}</Markdown> */} |
</div> |
</div> |
<div ref={msgref}></div> |
</> |
); |
})} |
</div> |
<div className={`${css.speakNow}`}> |
{socketConnected || !socketConnected ? ( |
<div> |
<div className={`${css.startstop}`}> |
<div className={`${css.totaltime}`}> |
{/* {!ismicopen ? "00:00:00" : formatTime(elapsedTime)} */} |
{`${latency} ms`} |
</div> |
<button onClick={handlemicchange}>{getButtonText()}</button> |
</div> |
</div> |
) : ( |
<div>Connecting...</div> |
)} |
</div> |
</div> |
</div> |
</div> |
); |
}; |
export default Playground; |