anuragsingh922's picture
Upload folder using huggingface_hub
d7dfeff verified
import React, { useEffect, useRef, useState } from "react";
import css from "./Playground.module.scss";
import toast from "react-hot-toast";
import { v4 as uuidv4 } from "uuid";
import { useNavigate } from "react-router-dom";
import { GiBroom } from "react-icons/gi";
import { RiAiGenerate } from "react-icons/ri";
const voices = [
{
name: "JILL",
value: "jill",
},
{
name: "JACK",
value: "jack",
},
];
const Playground = () => {
const navigate = useNavigate();
const [temperature, setTemperature] = useState(0.7);
const [threshold, setThreshold] = useState(0.5);
const [prefixPadding, setPrefixPadding] = useState(300);
const [silenceDuration, setSilenceDuration] = useState(100);
const [mobileView, setmobileView] = useState(false);
const [generating, setgenerating] = useState(false);
const [isVoices, setisVoices] = useState(false);
const [selectedVoice, setselectedVoice] = useState(voices[0]);
const [ismicopen, setismicopen] = useState(false);
const [ismic, setismic] = useState(false);
const [audioStream, setAudioStream] = useState(null);
const [audioContext, setAudioContext] = useState(null);
const sourceRef = useRef(null);
const audioContextRef = useRef(null);
const playing = useRef(false);
const [isplaying, setisplaying] = useState(false);
const lastshifted = useRef(null);
const bufferQueue = useRef([]);
const socket = useRef(null);
const isgrpc = useRef(null);
const [isgrpcs, setisgrpc] = useState(false);
const [socketConnected, setsocketConnected] = useState(false);
const [audioContextState, setaudioContextState] = useState(false);
if (!sessionStorage.getItem("sessionId")) {
const sessionId = uuidv4();
sessionStorage.setItem("sessionId", sessionId);
}
const [session, setsession] = useState(sessionStorage.getItem("sessionId"));
const [system_prompt, setsetsystemPrompt] = useState("");
const [elapsedTime, setElapsedTime] = useState(0);
const [latency, setlatency] = useState(0);
const [chathistory, setchathistory] = useState([]);
const msgref = useRef(null);
useEffect(() => {
if (chathistory) {
msgref?.current?.scrollIntoView({ behavior: "smooth" });
}
}, [chathistory]);
const handlePlay = async (da) => {
try {
if (!isgrpc.current) {
return;
}
playing.current = true;
setisplaying(true);
const base64Data = bufferQueue.current.shift();
lastshifted.current = base64Data;
const bytes = new Uint8Array(
atob(base64Data)
.split("")
.map((char) => char.charCodeAt(0))
);
let arrayBuffer = bytes.buffer;
const metadataEndIndex = bytes.indexOf(0);
let metadata = new TextDecoder().decode(bytes.slice(0, metadataEndIndex));
metadata = JSON.parse(metadata);
const { session_id, sequence_id, transcript } = metadata;
if (sequence_id !== "-2") {
if (socket.interval) {
clearInterval(socket.interval);
socket.interval = null;
setlatency((prev) => prev + 150);
}
arrayBuffer = arrayBuffer.slice(
metadataEndIndex + 1,
arrayBuffer.length
);
try {
if (audioContextRef.current.state === "suspended") {
await audioContextRef.current.resume();
}
const audioBuffer = await audioContextRef.current.decodeAudioData(
arrayBuffer
);
if (sourceRef.current) {
sourceRef.current.disconnect();
}
const source = audioContextRef.current.createBufferSource();
source.buffer = audioBuffer;
source.connect(audioContextRef.current.destination);
source.start(0);
sourceRef.current = source;
sourceRef.current.session_id = session_id;
sourceRef.current.sequence_id = sequence_id;
sourceRef.current.transcript = transcript;
sourceRef.current.onended = () => {
lastshifted.current = null;
if (
socket.current.readyState === WebSocket.OPEN &&
sourceRef?.current?.sequence_id
) {
socket.current.send(
JSON.stringify({
type: "status",
msg: {
session_id: sourceRef?.current?.session_id,
sequence_id: sourceRef?.current?.sequence_id,
transcript: sourceRef?.current?.transcript,
},
})
);
}
if (bufferQueue.current.length > 0) {
playing.current = true;
setisplaying(true);
const da = Date.now();
handlePlay(da);
} else {
playing.current = false;
setisplaying(false);
}
};
} catch (error) {
console.error("Error decoding audio data:", error);
}
} else {
const startTime = Date.now();
const intervall = setInterval(() => {
setlatency(Date.now() - startTime);
}, 10);
socket.interval = intervall;
if (bufferQueue.current.length > 0) {
playing.current = true;
setisplaying(true);
const da = Date.now();
handlePlay(da);
} else {
playing.current = false;
setisplaying(false);
}
}
} catch (error) {
console.error("Error in handlePlay: ", error);
}
};
const handlemicchange = async () => {
if (ismicopen && sourceRef.current) {
sourceRef.current.onended = null;
sourceRef.current.stop();
sourceRef.current.disconnect();
sourceRef.current = null;
bufferQueue.current = [];
}
if (!ismicopen) {
setlatency(0);
setismicopen(true);
await connectToRealtimeTTS();
socket.current.send(
JSON.stringify({
type: "start",
msg: JSON.stringify({
temperature: temperature,
prefixPadding: prefixPadding,
silenceDuration: silenceDuration,
voice: selectedVoice?.value,
threshold: threshold,
system_prompt: system_prompt,
sessionId: session,
}),
})
);
} else {
stopAudioStream();
}
ismic ? setismic(false) : setismic(true);
};
// useEffect(() => {
const connectToRealtimeTTS = async () => {
return new Promise((resolve, reject) => {
try {
const audioContext = new (window.AudioContext ||
window.webkitAudioContext)();
audioContextRef.current = audioContext;
setaudioContextState(true);
const websocketURL = process.env.REACT_APP_WEBSOCKET_URL.includes(
"localhost"
)
? `ws://${process.env.REACT_APP_WEBSOCKET_URL}/v2v`
: `wss://${process.env.REACT_APP_WEBSOCKET_URL}/v2v`;
const ws = new WebSocket(websocketURL);
ws.onopen = () => {
// console.log("WebSocket connected");
// socket.current = ws;
// setsocketConnected(true);
// toast.success("Now You can speak...");
};
ws.onmessage = async (event) => {
try {
const data = JSON.parse(event.data);
const { type, msg } = data;
switch (type) {
case "initial":
socket.current = ws;
setsocketConnected(true);
resolve(null);
break;
case "media":
const da = Date.now();
bufferQueue.current.push(msg);
if (!playing.current && bufferQueue.current.length > 0) {
handlePlay(da);
}
break;
case "info":
toast.error(msg);
break;
case "ready":
isgrpc.current = true;
setisgrpc(true);
await startAudioStream();
break;
case "pause":
if (sourceRef.current) {
sourceRef.current.onended = null;
sourceRef.current.stop();
sourceRef.current.disconnect();
sourceRef.current = null;
}
playing.current = false;
setgenerating(true);
setisplaying(false);
break;
case "continue":
if (lastshifted.current) {
bufferQueue.current.unshift(lastshifted.current);
lastshifted.current = null;
}
setgenerating(false);
const daa = Date.now();
handlePlay(daa);
if (socket.interval) {
clearInterval(socket.interval);
socket.interval = null;
}
break;
case "clear":
bufferQueue.current = [];
playing.current = false;
setgenerating(false);
setisplaying(false);
if (sourceRef.current) {
sourceRef.current.onended = null;
sourceRef.current.stop();
sourceRef.current.disconnect();
sourceRef.current = null;
}
break;
case "end":
try {
audioContext
.close()
.then(() => {
setaudioContextState(false);
if (ismicopen) {
toast.error("Please restart the conversation.");
}
})
.catch((err) => {
if (ismicopen) {
toast.error("Please restart the conversation.");
}
});
await stopAudioStream();
setismicopen(false);
} catch (error) {
// console.error("Error in closing audioContext.");
}
break;
case "chathistory":
setchathistory(msg);
break;
default:
break;
}
} catch (error) {
// console.error("Error in websocket media.");
}
};
ws.onclose = async () => {
try {
await audioStream.getTracks().forEach((track) => track.stop());
setAudioStream(null);
setElapsedTime(0);
} catch (err) {
// console.error(err);
}
};
ws.onerror = (err) => {
// console.error("Websocket Error", err);
};
} catch (err) {
console.error("Error in making websocket connection.");
reject(null);
}
});
};
function floatTo16BitPCM(input) {
const output = new Int16Array(input.length);
for (let i = 0; i < input.length; i++) {
const sample = Math.max(-1, Math.min(1, input[i]));
output[i] = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
}
return output.buffer;
}
const startAudioStream = async () => {
try {
const startTime = Date.now();
setchathistory([]);
const interval = setInterval(() => {
setElapsedTime(Date.now() - startTime);
}, 1000);
if (!socket.current) {
toast.error("Please try again.. Socket");
return;
}
audioContextRef.current = new (window.AudioContext ||
window.webkitAudioContext)();
setismicopen(true);
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
setAudioStream(stream);
const audioContext = new AudioContext({
sampleRate: 16000,
});
setAudioContext(audioContext);
const audioInput = audioContext.createMediaStreamSource(stream);
const bufferSize = 2048;
const scriptProcessorNode = audioContext.createScriptProcessor(
bufferSize,
1,
1
);
scriptProcessorNode.onaudioprocess = async (e) => {
const inputData = e.inputBuffer.getChannelData(0);
const l16Data = floatTo16BitPCM(inputData);
try {
if (!isgrpc.current) {
try {
if (audioContextState) {
audioContext
.close()
.then(() => {
setaudioContextState(false);
toast.error("Please restart the conversation.");
})
.catch((err) => {
toast.error("Please restart the conversation.");
// console.error(err);
});
await stopAudioStream();
toast.error("Please start again.");
setismicopen(false);
}
} catch (error) {
// console.error("Error in closing audioContext.");
}
}
if (
isgrpc.current &&
socket.current &&
socket.current.readyState === WebSocket.OPEN
) {
// if (socket.current.grpc) {
socket.current.send(l16Data);
}
} catch (err) {
// console.error("Error in sending buffer.");
}
};
audioInput.connect(scriptProcessorNode);
scriptProcessorNode.connect(audioContext.destination);
} catch (error) {
// console.error("Error accessing microphone:", error);
}
};
const stopAudioStream = async () => {
setgenerating(false);
setismicopen(false);
setElapsedTime(0);
setisgrpc(false);
bufferQueue.current = [];
if (socket.current.readyState === WebSocket.OPEN) {
socket.current.close();
}
if (socket.interval) {
clearInterval(socket.interval);
socket.current = null;
}
if (
socket.current &&
audioStream &&
socket.current.readyState === WebSocket.OPEN
) {
try {
if (audioStream) {
await audioStream.getTracks().forEach((track) => track.stop());
setAudioStream(null);
}
} catch (err) {
// console.error(err);
}
try {
if (socket.current.readyState === WebSocket.OPEN) {
isgrpc.current = false;
socket.current.send(JSON.stringify({ type: "stop", msg: "stop" }));
}
audioContext
.close()
.then(() => {
// console.log("AudioContext closed.");
})
.catch((err) => {
// console.error("Error in closing the audioContext.", err);
});
} catch (err) {
// console.error("Error in closing the audioContext.");
}
}
};
const getButtonText = () => {
if (!ismicopen) {
return "Start";
}
if (ismicopen && !isgrpcs) {
return "Starting";
}
if (isgrpcs) {
return "Stop";
}
return "";
};
const formatTime = (milliseconds) => {
let totalSeconds = Math.floor(milliseconds / 1000);
let hours = Math.floor(totalSeconds / 3600);
let minutes = Math.floor((totalSeconds % 3600) / 60);
let seconds = totalSeconds % 60;
return `${String(hours).padStart(2, "0")}:${String(minutes).padStart(
2,
"0"
)}:${String(seconds).padStart(2, "0")}`;
};
return (
<div className={css.main}>
<div className={css.container}>
<div className={css.realtime}>
<div className={`${css.clarbtn} ${!mobileView && css.marginclear}`}>
<button
onClick={() => {
sessionStorage.removeItem("sessionId");
const sessionId = uuidv4();
setchathistory([]);
sessionStorage.setItem("sessionId", sessionId);
setsession(sessionStorage.getItem("sessionId"));
toast.success("New Session Started.");
}}
>
<GiBroom className={css.broom} />
clear
</button>
</div>
<div className={css.conversation}>
{chathistory.length > 0 &&
chathistory.map((item, index) => {
return (
<>
<div className={css.message} key={index}>
{/* <span>{item.timestamp}</span> */}
<div className={css.msg}>
<div className={css.speaker}>{item.speaker}</div>
<div>{item.content}</div>
{/* <Markdown remarkPlugins={[remarkGfm]} className={css.markdown}>{item.content}</Markdown> */}
</div>
</div>
<div ref={msgref}></div>
</>
);
})}
</div>
<div className={`${css.speakNow}`}>
{socketConnected || !socketConnected ? (
<div>
<div className={`${css.startstop}`}>
<div className={`${css.totaltime}`}>
{/* {!ismicopen ? "00:00:00" : formatTime(elapsedTime)} */}
{`${latency} ms`}
</div>
<button onClick={handlemicchange}>{getButtonText()}</button>
</div>
</div>
) : (
<div>Connecting...</div>
)}
</div>
</div>
</div>
</div>
);
};
export default Playground;