|
import React, { useEffect, useRef, useState } from "react"; |
|
import css from "./Playground.module.scss"; |
|
import toast from "react-hot-toast"; |
|
import { v4 as uuidv4 } from "uuid"; |
|
import { useNavigate } from "react-router-dom"; |
|
import { GiBroom } from "react-icons/gi"; |
|
import { RiAiGenerate } from "react-icons/ri"; |
|
|
|
const voices = [ |
|
{ |
|
name: "JILL", |
|
value: "jill", |
|
}, |
|
{ |
|
name: "JACK", |
|
value: "jack", |
|
}, |
|
]; |
|
|
|
const Playground = () => { |
|
const navigate = useNavigate(); |
|
const [temperature, setTemperature] = useState(0.7); |
|
const [threshold, setThreshold] = useState(0.5); |
|
const [prefixPadding, setPrefixPadding] = useState(300); |
|
const [silenceDuration, setSilenceDuration] = useState(100); |
|
const [mobileView, setmobileView] = useState(false); |
|
const [generating, setgenerating] = useState(false); |
|
const [isVoices, setisVoices] = useState(false); |
|
const [selectedVoice, setselectedVoice] = useState(voices[0]); |
|
const [ismicopen, setismicopen] = useState(false); |
|
const [ismic, setismic] = useState(false); |
|
const [audioStream, setAudioStream] = useState(null); |
|
const [audioContext, setAudioContext] = useState(null); |
|
const sourceRef = useRef(null); |
|
const audioContextRef = useRef(null); |
|
const playing = useRef(false); |
|
const [isplaying, setisplaying] = useState(false); |
|
const lastshifted = useRef(null); |
|
const bufferQueue = useRef([]); |
|
const socket = useRef(null); |
|
const isgrpc = useRef(null); |
|
const [isgrpcs, setisgrpc] = useState(false); |
|
const [socketConnected, setsocketConnected] = useState(false); |
|
const [audioContextState, setaudioContextState] = useState(false); |
|
if (!sessionStorage.getItem("sessionId")) { |
|
const sessionId = uuidv4(); |
|
sessionStorage.setItem("sessionId", sessionId); |
|
} |
|
const [session, setsession] = useState(sessionStorage.getItem("sessionId")); |
|
const [system_prompt, setsetsystemPrompt] = useState(""); |
|
const [elapsedTime, setElapsedTime] = useState(0); |
|
const [latency, setlatency] = useState(0); |
|
|
|
const [chathistory, setchathistory] = useState([]); |
|
const msgref = useRef(null); |
|
|
|
useEffect(() => { |
|
if (chathistory) { |
|
msgref?.current?.scrollIntoView({ behavior: "smooth" }); |
|
} |
|
}, [chathistory]); |
|
|
|
const handlePlay = async (da) => { |
|
try { |
|
if (!isgrpc.current) { |
|
return; |
|
} |
|
playing.current = true; |
|
setisplaying(true); |
|
|
|
const base64Data = bufferQueue.current.shift(); |
|
lastshifted.current = base64Data; |
|
|
|
const bytes = new Uint8Array( |
|
atob(base64Data) |
|
.split("") |
|
.map((char) => char.charCodeAt(0)) |
|
); |
|
|
|
let arrayBuffer = bytes.buffer; |
|
|
|
const metadataEndIndex = bytes.indexOf(0); |
|
let metadata = new TextDecoder().decode(bytes.slice(0, metadataEndIndex)); |
|
metadata = JSON.parse(metadata); |
|
|
|
const { session_id, sequence_id, transcript } = metadata; |
|
if (sequence_id !== "-2") { |
|
if (socket.interval) { |
|
clearInterval(socket.interval); |
|
socket.interval = null; |
|
setlatency((prev) => prev + 150); |
|
} |
|
arrayBuffer = arrayBuffer.slice( |
|
metadataEndIndex + 1, |
|
arrayBuffer.length |
|
); |
|
|
|
try { |
|
if (audioContextRef.current.state === "suspended") { |
|
await audioContextRef.current.resume(); |
|
} |
|
const audioBuffer = await audioContextRef.current.decodeAudioData( |
|
arrayBuffer |
|
); |
|
|
|
if (sourceRef.current) { |
|
sourceRef.current.disconnect(); |
|
} |
|
|
|
const source = audioContextRef.current.createBufferSource(); |
|
source.buffer = audioBuffer; |
|
source.connect(audioContextRef.current.destination); |
|
source.start(0); |
|
sourceRef.current = source; |
|
sourceRef.current.session_id = session_id; |
|
sourceRef.current.sequence_id = sequence_id; |
|
sourceRef.current.transcript = transcript; |
|
|
|
sourceRef.current.onended = () => { |
|
lastshifted.current = null; |
|
if ( |
|
socket.current.readyState === WebSocket.OPEN && |
|
sourceRef?.current?.sequence_id |
|
) { |
|
socket.current.send( |
|
JSON.stringify({ |
|
type: "status", |
|
msg: { |
|
session_id: sourceRef?.current?.session_id, |
|
sequence_id: sourceRef?.current?.sequence_id, |
|
transcript: sourceRef?.current?.transcript, |
|
}, |
|
}) |
|
); |
|
} |
|
|
|
if (bufferQueue.current.length > 0) { |
|
playing.current = true; |
|
setisplaying(true); |
|
const da = Date.now(); |
|
handlePlay(da); |
|
} else { |
|
playing.current = false; |
|
setisplaying(false); |
|
} |
|
}; |
|
} catch (error) { |
|
console.error("Error decoding audio data:", error); |
|
} |
|
} else { |
|
const startTime = Date.now(); |
|
const intervall = setInterval(() => { |
|
setlatency(Date.now() - startTime); |
|
}, 10); |
|
socket.interval = intervall; |
|
if (bufferQueue.current.length > 0) { |
|
playing.current = true; |
|
setisplaying(true); |
|
const da = Date.now(); |
|
handlePlay(da); |
|
} else { |
|
playing.current = false; |
|
setisplaying(false); |
|
} |
|
} |
|
} catch (error) { |
|
console.error("Error in handlePlay: ", error); |
|
} |
|
}; |
|
|
|
const handlemicchange = async () => { |
|
if (ismicopen && sourceRef.current) { |
|
sourceRef.current.onended = null; |
|
sourceRef.current.stop(); |
|
sourceRef.current.disconnect(); |
|
sourceRef.current = null; |
|
bufferQueue.current = []; |
|
} |
|
if (!ismicopen) { |
|
setlatency(0); |
|
setismicopen(true); |
|
await connectToRealtimeTTS(); |
|
socket.current.send( |
|
JSON.stringify({ |
|
type: "start", |
|
msg: JSON.stringify({ |
|
temperature: temperature, |
|
prefixPadding: prefixPadding, |
|
silenceDuration: silenceDuration, |
|
voice: selectedVoice?.value, |
|
threshold: threshold, |
|
system_prompt: system_prompt, |
|
sessionId: session, |
|
}), |
|
}) |
|
); |
|
} else { |
|
stopAudioStream(); |
|
} |
|
ismic ? setismic(false) : setismic(true); |
|
}; |
|
|
|
|
|
const connectToRealtimeTTS = async () => { |
|
return new Promise((resolve, reject) => { |
|
try { |
|
const audioContext = new (window.AudioContext || |
|
window.webkitAudioContext)(); |
|
audioContextRef.current = audioContext; |
|
setaudioContextState(true); |
|
const websocketURL = process.env.REACT_APP_WEBSOCKET_URL.includes( |
|
"localhost" |
|
) |
|
? `ws://${process.env.REACT_APP_WEBSOCKET_URL}/v2v` |
|
: `wss://${process.env.REACT_APP_WEBSOCKET_URL}/v2v`; |
|
const ws = new WebSocket(websocketURL); |
|
ws.onopen = () => { |
|
|
|
|
|
|
|
|
|
}; |
|
|
|
ws.onmessage = async (event) => { |
|
try { |
|
const data = JSON.parse(event.data); |
|
|
|
const { type, msg } = data; |
|
|
|
switch (type) { |
|
case "initial": |
|
socket.current = ws; |
|
setsocketConnected(true); |
|
resolve(null); |
|
break; |
|
case "media": |
|
const da = Date.now(); |
|
bufferQueue.current.push(msg); |
|
|
|
if (!playing.current && bufferQueue.current.length > 0) { |
|
handlePlay(da); |
|
} |
|
break; |
|
case "info": |
|
toast.error(msg); |
|
break; |
|
case "ready": |
|
isgrpc.current = true; |
|
setisgrpc(true); |
|
await startAudioStream(); |
|
break; |
|
case "pause": |
|
if (sourceRef.current) { |
|
sourceRef.current.onended = null; |
|
sourceRef.current.stop(); |
|
sourceRef.current.disconnect(); |
|
sourceRef.current = null; |
|
} |
|
playing.current = false; |
|
setgenerating(true); |
|
setisplaying(false); |
|
break; |
|
case "continue": |
|
if (lastshifted.current) { |
|
bufferQueue.current.unshift(lastshifted.current); |
|
lastshifted.current = null; |
|
} |
|
setgenerating(false); |
|
const daa = Date.now(); |
|
handlePlay(daa); |
|
if (socket.interval) { |
|
clearInterval(socket.interval); |
|
socket.interval = null; |
|
} |
|
break; |
|
case "clear": |
|
bufferQueue.current = []; |
|
playing.current = false; |
|
setgenerating(false); |
|
setisplaying(false); |
|
|
|
if (sourceRef.current) { |
|
sourceRef.current.onended = null; |
|
sourceRef.current.stop(); |
|
sourceRef.current.disconnect(); |
|
sourceRef.current = null; |
|
} |
|
break; |
|
case "end": |
|
try { |
|
audioContext |
|
.close() |
|
.then(() => { |
|
setaudioContextState(false); |
|
if (ismicopen) { |
|
toast.error("Please restart the conversation."); |
|
} |
|
}) |
|
.catch((err) => { |
|
if (ismicopen) { |
|
toast.error("Please restart the conversation."); |
|
} |
|
}); |
|
await stopAudioStream(); |
|
setismicopen(false); |
|
} catch (error) { |
|
|
|
} |
|
break; |
|
case "chathistory": |
|
setchathistory(msg); |
|
break; |
|
default: |
|
break; |
|
} |
|
} catch (error) { |
|
|
|
} |
|
}; |
|
|
|
ws.onclose = async () => { |
|
try { |
|
await audioStream.getTracks().forEach((track) => track.stop()); |
|
setAudioStream(null); |
|
setElapsedTime(0); |
|
} catch (err) { |
|
|
|
} |
|
}; |
|
|
|
ws.onerror = (err) => { |
|
|
|
}; |
|
} catch (err) { |
|
console.error("Error in making websocket connection."); |
|
reject(null); |
|
} |
|
}); |
|
}; |
|
|
|
function floatTo16BitPCM(input) { |
|
const output = new Int16Array(input.length); |
|
for (let i = 0; i < input.length; i++) { |
|
const sample = Math.max(-1, Math.min(1, input[i])); |
|
output[i] = sample < 0 ? sample * 0x8000 : sample * 0x7fff; |
|
} |
|
return output.buffer; |
|
} |
|
|
|
const startAudioStream = async () => { |
|
try { |
|
const startTime = Date.now(); |
|
setchathistory([]); |
|
const interval = setInterval(() => { |
|
setElapsedTime(Date.now() - startTime); |
|
}, 1000); |
|
if (!socket.current) { |
|
toast.error("Please try again.. Socket"); |
|
return; |
|
} |
|
audioContextRef.current = new (window.AudioContext || |
|
window.webkitAudioContext)(); |
|
setismicopen(true); |
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); |
|
setAudioStream(stream); |
|
|
|
const audioContext = new AudioContext({ |
|
sampleRate: 16000, |
|
}); |
|
|
|
setAudioContext(audioContext); |
|
const audioInput = audioContext.createMediaStreamSource(stream); |
|
const bufferSize = 2048; |
|
const scriptProcessorNode = audioContext.createScriptProcessor( |
|
bufferSize, |
|
1, |
|
1 |
|
); |
|
|
|
scriptProcessorNode.onaudioprocess = async (e) => { |
|
const inputData = e.inputBuffer.getChannelData(0); |
|
const l16Data = floatTo16BitPCM(inputData); |
|
try { |
|
if (!isgrpc.current) { |
|
try { |
|
if (audioContextState) { |
|
audioContext |
|
.close() |
|
.then(() => { |
|
setaudioContextState(false); |
|
toast.error("Please restart the conversation."); |
|
}) |
|
.catch((err) => { |
|
toast.error("Please restart the conversation."); |
|
|
|
}); |
|
await stopAudioStream(); |
|
toast.error("Please start again."); |
|
setismicopen(false); |
|
} |
|
} catch (error) { |
|
|
|
} |
|
} |
|
if ( |
|
isgrpc.current && |
|
socket.current && |
|
socket.current.readyState === WebSocket.OPEN |
|
) { |
|
|
|
socket.current.send(l16Data); |
|
} |
|
} catch (err) { |
|
|
|
} |
|
}; |
|
|
|
audioInput.connect(scriptProcessorNode); |
|
scriptProcessorNode.connect(audioContext.destination); |
|
} catch (error) { |
|
|
|
} |
|
}; |
|
|
|
const stopAudioStream = async () => { |
|
setgenerating(false); |
|
setismicopen(false); |
|
setElapsedTime(0); |
|
setisgrpc(false); |
|
bufferQueue.current = []; |
|
if (socket.current.readyState === WebSocket.OPEN) { |
|
socket.current.close(); |
|
} |
|
if (socket.interval) { |
|
clearInterval(socket.interval); |
|
socket.current = null; |
|
} |
|
if ( |
|
socket.current && |
|
audioStream && |
|
socket.current.readyState === WebSocket.OPEN |
|
) { |
|
try { |
|
if (audioStream) { |
|
await audioStream.getTracks().forEach((track) => track.stop()); |
|
setAudioStream(null); |
|
} |
|
} catch (err) { |
|
|
|
} |
|
try { |
|
if (socket.current.readyState === WebSocket.OPEN) { |
|
isgrpc.current = false; |
|
socket.current.send(JSON.stringify({ type: "stop", msg: "stop" })); |
|
} |
|
audioContext |
|
.close() |
|
.then(() => { |
|
|
|
}) |
|
.catch((err) => { |
|
|
|
}); |
|
} catch (err) { |
|
|
|
} |
|
} |
|
}; |
|
const getButtonText = () => { |
|
if (!ismicopen) { |
|
return "Start"; |
|
} |
|
if (ismicopen && !isgrpcs) { |
|
return "Starting"; |
|
} |
|
if (isgrpcs) { |
|
return "Stop"; |
|
} |
|
return ""; |
|
}; |
|
|
|
const formatTime = (milliseconds) => { |
|
let totalSeconds = Math.floor(milliseconds / 1000); |
|
let hours = Math.floor(totalSeconds / 3600); |
|
let minutes = Math.floor((totalSeconds % 3600) / 60); |
|
let seconds = totalSeconds % 60; |
|
|
|
return `${String(hours).padStart(2, "0")}:${String(minutes).padStart( |
|
2, |
|
"0" |
|
)}:${String(seconds).padStart(2, "0")}`; |
|
}; |
|
|
|
return ( |
|
<div className={css.main}> |
|
<div className={css.container}> |
|
<div className={css.realtime}> |
|
<div className={`${css.clarbtn} ${!mobileView && css.marginclear}`}> |
|
<button |
|
onClick={() => { |
|
sessionStorage.removeItem("sessionId"); |
|
const sessionId = uuidv4(); |
|
setchathistory([]); |
|
sessionStorage.setItem("sessionId", sessionId); |
|
setsession(sessionStorage.getItem("sessionId")); |
|
toast.success("New Session Started."); |
|
}} |
|
> |
|
<GiBroom className={css.broom} /> |
|
clear |
|
</button> |
|
</div> |
|
<div className={css.conversation}> |
|
{chathistory.length > 0 && |
|
chathistory.map((item, index) => { |
|
return ( |
|
<> |
|
<div className={css.message} key={index}> |
|
{/* <span>{item.timestamp}</span> */} |
|
<div className={css.msg}> |
|
<div className={css.speaker}>{item.speaker}</div> |
|
<div>{item.content}</div> |
|
{/* <Markdown remarkPlugins={[remarkGfm]} className={css.markdown}>{item.content}</Markdown> */} |
|
</div> |
|
</div> |
|
<div ref={msgref}></div> |
|
</> |
|
); |
|
})} |
|
</div> |
|
<div className={`${css.speakNow}`}> |
|
{socketConnected || !socketConnected ? ( |
|
<div> |
|
<div className={`${css.startstop}`}> |
|
<div className={`${css.totaltime}`}> |
|
{/* {!ismicopen ? "00:00:00" : formatTime(elapsedTime)} */} |
|
{`${latency} ms`} |
|
</div> |
|
<button onClick={handlemicchange}>{getButtonText()}</button> |
|
</div> |
|
</div> |
|
) : ( |
|
<div>Connecting...</div> |
|
)} |
|
</div> |
|
</div> |
|
</div> |
|
</div> |
|
); |
|
}; |
|
|
|
export default Playground; |
|
|