Spaces:
Paused
Paused
Update script1.js
Browse files- script1.js +34 -22
script1.js
CHANGED
|
@@ -1,12 +1,12 @@
|
|
| 1 |
// Constants and Configuration
|
| 2 |
const USER_SPEECH_INTERRUPT_DELAY = 500;
|
| 3 |
-
const TEXT_TO_SPEECH_API_ENDPOINT = "https://api.streamelements.com/kappa/v2/speech";
|
| 4 |
const CHUNK_SIZE = 300;
|
| 5 |
-
const MAX_PREFETCH_REQUESTS = 10;
|
| 6 |
const PREFETCH_CACHE_EXPIRATION = 60000; // 1 minute
|
| 7 |
const AUDIO_CACHE_EXPIRATION = 3600000; // 1 hour
|
| 8 |
-
const WEBCAM_INTERVAL = 5000;
|
| 9 |
-
const MAX_HISTORY_LENGTH = 6;
|
| 10 |
|
| 11 |
// DOM Elements
|
| 12 |
const startStopButton = document.getElementById('startStopButton');
|
|
@@ -50,7 +50,7 @@ const audioCache = new Map();
|
|
| 50 |
|
| 51 |
// Image Captioning State
|
| 52 |
let isCaptioningEnabled = false;
|
| 53 |
-
let lastCaption = "";
|
| 54 |
|
| 55 |
// Webcam Integration
|
| 56 |
import { client, handle_file } from 'https://cdn.jsdelivr.net/npm/@gradio/client/+esm';
|
|
@@ -155,8 +155,12 @@ const interruptAudioPlayback = (reason = 'unknown') => {
|
|
| 155 |
requestAbortController = null;
|
| 156 |
}
|
| 157 |
|
| 158 |
-
|
| 159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
updateActivityIndicators();
|
| 161 |
};
|
| 162 |
|
|
@@ -165,12 +169,16 @@ const interruptAudioPlayback = (reason = 'unknown') => {
|
|
| 165 |
|
| 166 |
// Prefetch and cache the first TTS audio chunk
|
| 167 |
const prefetchFirstAudioChunk = (query, voice) => {
|
| 168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
const cacheKey = generateCacheKey(normalizedQuery, voice, conversationHistory, modelSelectionDropdown.value);
|
| 170 |
|
| 171 |
if (pendingPrefetchRequests.has(cacheKey) || prefetchCache.has(cacheKey)) return;
|
| 172 |
|
| 173 |
-
prefetchQueue.push({ query:
|
| 174 |
processPrefetchQueue();
|
| 175 |
};
|
| 176 |
|
|
@@ -197,7 +205,11 @@ const processPrefetchQueue = async () => {
|
|
| 197 |
|
| 198 |
// Cancel pending prefetch requests
|
| 199 |
const cancelPrefetchRequests = (query) => {
|
| 200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
for (const [cacheKey, abortController] of pendingPrefetchRequests) {
|
| 203 |
if (cacheKey.startsWith(normalizedQuery)) {
|
|
@@ -224,7 +236,7 @@ async function sendQueryToAI(query) {
|
|
| 224 |
try {
|
| 225 |
let combinedQuery = `{USER: "${query}"}`;
|
| 226 |
if (lastCaption !== "") {
|
| 227 |
-
combinedQuery += `, ${lastCaption} , {USER: "${query}"}`;
|
| 228 |
}
|
| 229 |
|
| 230 |
await streamAndHandleAudioResponse(combinedQuery, voiceSelectionDropdown.value, requestAbortController.signal);
|
|
@@ -243,8 +255,8 @@ const processSpeechTranscript = (transcript) => {
|
|
| 243 |
const trimmedTranscript = transcript.trimStart();
|
| 244 |
if (trimmedTranscript !== '' && !isRequestInProgress) {
|
| 245 |
activeQuery = trimmedTranscript;
|
| 246 |
-
addToConversationHistory('user', activeQuery);
|
| 247 |
-
sendQueryToAI(activeQuery);
|
| 248 |
}
|
| 249 |
};
|
| 250 |
|
|
@@ -368,7 +380,7 @@ const handleStreamingResponse = async (responseStream, voice, abortSignal) => {
|
|
| 368 |
if (textContent) {
|
| 369 |
if (!firstResponseTextTimestamp) firstResponseTextTimestamp = Date.now();
|
| 370 |
|
| 371 |
-
fullResponseText += textContent + " ";
|
| 372 |
fullResponseText2 += textContent + " ";
|
| 373 |
textChunk += textContent + " ";
|
| 374 |
transcriptDiv.textContent = fullResponseText2;
|
|
@@ -380,13 +392,13 @@ const handleStreamingResponse = async (responseStream, voice, abortSignal) => {
|
|
| 380 |
audioPlaybackQueue.push({ url: audioUrl });
|
| 381 |
if (!currentAudio) playNextAudio();
|
| 382 |
}
|
| 383 |
-
textChunk = "";
|
| 384 |
}
|
| 385 |
}
|
| 386 |
}
|
| 387 |
}
|
| 388 |
|
| 389 |
-
buffer = lines[lines.length - 1];
|
| 390 |
}
|
| 391 |
} catch (error) {
|
| 392 |
console.error("Error in handleStreamingResponse:", error);
|
|
@@ -402,8 +414,8 @@ const handleStreamingResponse = async (responseStream, voice, abortSignal) => {
|
|
| 402 |
}
|
| 403 |
|
| 404 |
addToConversationHistory('assistant', fullResponseText2);
|
| 405 |
-
fullResponseText = "";
|
| 406 |
-
fullResponseText2 = "";
|
| 407 |
}
|
| 408 |
};
|
| 409 |
|
|
@@ -502,13 +514,13 @@ if ('webkitSpeechRecognition' in window) {
|
|
| 502 |
isSpeechRecognitionActive = false;
|
| 503 |
startStopButton.innerHTML = '<svg id="microphoneIcon" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Start Listening';
|
| 504 |
clearInterval(webcamInterval);
|
| 505 |
-
video.srcObject = null;
|
| 506 |
lastCaption = "";
|
| 507 |
} else {
|
| 508 |
speechRecognizer.start();
|
| 509 |
isSpeechRecognitionActive = true;
|
| 510 |
startStopButton.innerHTML = '<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M9 9h6v6h-6z"></path><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Stop Listening';
|
| 511 |
-
isCaptioningEnabled = true;
|
| 512 |
startWebcam();
|
| 513 |
}
|
| 514 |
});
|
|
@@ -525,7 +537,7 @@ async function startWebcam() {
|
|
| 525 |
try {
|
| 526 |
const stream = await navigator.mediaDevices.getUserMedia({ video: true });
|
| 527 |
video.srcObject = stream;
|
| 528 |
-
webcamInterval = setInterval(captureAndProcessImage, WEBCAM_INTERVAL);
|
| 529 |
} catch (error) {
|
| 530 |
console.error("Error accessing webcam: ", error);
|
| 531 |
}
|
|
@@ -551,7 +563,7 @@ async function processWithGradio(imageBlob) {
|
|
| 551 |
const result = await app.predict("/process_image", [handledFile, "More Detailed Caption"]);
|
| 552 |
|
| 553 |
const dataString = result.data[0];
|
| 554 |
-
lastCaption = dataString || lastCaption;
|
| 555 |
} catch (error) {
|
| 556 |
console.error("Error processing with Gradio:", error);
|
| 557 |
}
|
|
|
|
| 1 |
// Constants and Configuration
|
| 2 |
const USER_SPEECH_INTERRUPT_DELAY = 500;
|
| 3 |
+
const TEXT_TO_SPEECH_API_ENDPOINT = "https://api.streamelements.com/kappa/v2/speech";
|
| 4 |
const CHUNK_SIZE = 300;
|
| 5 |
+
const MAX_PREFETCH_REQUESTS = 10;
|
| 6 |
const PREFETCH_CACHE_EXPIRATION = 60000; // 1 minute
|
| 7 |
const AUDIO_CACHE_EXPIRATION = 3600000; // 1 hour
|
| 8 |
+
const WEBCAM_INTERVAL = 5000;
|
| 9 |
+
const MAX_HISTORY_LENGTH = 6;
|
| 10 |
|
| 11 |
// DOM Elements
|
| 12 |
const startStopButton = document.getElementById('startStopButton');
|
|
|
|
| 50 |
|
| 51 |
// Image Captioning State
|
| 52 |
let isCaptioningEnabled = false;
|
| 53 |
+
let lastCaption = "";
|
| 54 |
|
| 55 |
// Webcam Integration
|
| 56 |
import { client, handle_file } from 'https://cdn.jsdelivr.net/npm/@gradio/client/+esm';
|
|
|
|
| 155 |
requestAbortController = null;
|
| 156 |
}
|
| 157 |
|
| 158 |
+
// Clear prefetch cache and queue only if the interruption is due to user speech
|
| 159 |
+
if (reason === 'user is speaking' || reason === 'interim') {
|
| 160 |
+
prefetchCache.clear();
|
| 161 |
+
prefetchQueue.length = 0;
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
updateActivityIndicators();
|
| 165 |
};
|
| 166 |
|
|
|
|
| 169 |
|
| 170 |
// Prefetch and cache the first TTS audio chunk
|
| 171 |
const prefetchFirstAudioChunk = (query, voice) => {
|
| 172 |
+
let combinedQuery = `{USER: "${query}"}`;
|
| 173 |
+
if (lastCaption !== "") {
|
| 174 |
+
combinedQuery += `, ${lastCaption} , {USER: "${query}"}`;
|
| 175 |
+
}
|
| 176 |
+
const normalizedQuery = normalizeQueryText(combinedQuery); // Normalize combined query
|
| 177 |
const cacheKey = generateCacheKey(normalizedQuery, voice, conversationHistory, modelSelectionDropdown.value);
|
| 178 |
|
| 179 |
if (pendingPrefetchRequests.has(cacheKey) || prefetchCache.has(cacheKey)) return;
|
| 180 |
|
| 181 |
+
prefetchQueue.push({ query: combinedQuery.trim(), voice, cacheKey }); // Use combined query
|
| 182 |
processPrefetchQueue();
|
| 183 |
};
|
| 184 |
|
|
|
|
| 205 |
|
| 206 |
// Cancel pending prefetch requests
|
| 207 |
const cancelPrefetchRequests = (query) => {
|
| 208 |
+
let combinedQuery = `{USER: "${query}"}`;
|
| 209 |
+
if (lastCaption !== "") {
|
| 210 |
+
combinedQuery += `, ${lastCaption} , {USER: "${query}"}`;
|
| 211 |
+
}
|
| 212 |
+
const normalizedQuery = normalizeQueryText(combinedQuery); // Normalize combined query
|
| 213 |
|
| 214 |
for (const [cacheKey, abortController] of pendingPrefetchRequests) {
|
| 215 |
if (cacheKey.startsWith(normalizedQuery)) {
|
|
|
|
| 236 |
try {
|
| 237 |
let combinedQuery = `{USER: "${query}"}`;
|
| 238 |
if (lastCaption !== "") {
|
| 239 |
+
combinedQuery += `, ${lastCaption} , {USER: "${query}"}`;
|
| 240 |
}
|
| 241 |
|
| 242 |
await streamAndHandleAudioResponse(combinedQuery, voiceSelectionDropdown.value, requestAbortController.signal);
|
|
|
|
| 255 |
const trimmedTranscript = transcript.trimStart();
|
| 256 |
if (trimmedTranscript !== '' && !isRequestInProgress) {
|
| 257 |
activeQuery = trimmedTranscript;
|
| 258 |
+
addToConversationHistory('user', activeQuery);
|
| 259 |
+
sendQueryToAI(activeQuery);
|
| 260 |
}
|
| 261 |
};
|
| 262 |
|
|
|
|
| 380 |
if (textContent) {
|
| 381 |
if (!firstResponseTextTimestamp) firstResponseTextTimestamp = Date.now();
|
| 382 |
|
| 383 |
+
fullResponseText += textContent + " ";
|
| 384 |
fullResponseText2 += textContent + " ";
|
| 385 |
textChunk += textContent + " ";
|
| 386 |
transcriptDiv.textContent = fullResponseText2;
|
|
|
|
| 392 |
audioPlaybackQueue.push({ url: audioUrl });
|
| 393 |
if (!currentAudio) playNextAudio();
|
| 394 |
}
|
| 395 |
+
textChunk = "";
|
| 396 |
}
|
| 397 |
}
|
| 398 |
}
|
| 399 |
}
|
| 400 |
|
| 401 |
+
buffer = lines[lines.length - 1];
|
| 402 |
}
|
| 403 |
} catch (error) {
|
| 404 |
console.error("Error in handleStreamingResponse:", error);
|
|
|
|
| 414 |
}
|
| 415 |
|
| 416 |
addToConversationHistory('assistant', fullResponseText2);
|
| 417 |
+
fullResponseText = "";
|
| 418 |
+
fullResponseText2 = "";
|
| 419 |
}
|
| 420 |
};
|
| 421 |
|
|
|
|
| 514 |
isSpeechRecognitionActive = false;
|
| 515 |
startStopButton.innerHTML = '<svg id="microphoneIcon" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Start Listening';
|
| 516 |
clearInterval(webcamInterval);
|
| 517 |
+
video.srcObject = null;
|
| 518 |
lastCaption = "";
|
| 519 |
} else {
|
| 520 |
speechRecognizer.start();
|
| 521 |
isSpeechRecognitionActive = true;
|
| 522 |
startStopButton.innerHTML = '<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M9 9h6v6h-6z"></path><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Stop Listening';
|
| 523 |
+
isCaptioningEnabled = true;
|
| 524 |
startWebcam();
|
| 525 |
}
|
| 526 |
});
|
|
|
|
| 537 |
try {
|
| 538 |
const stream = await navigator.mediaDevices.getUserMedia({ video: true });
|
| 539 |
video.srcObject = stream;
|
| 540 |
+
webcamInterval = setInterval(captureAndProcessImage, WEBCAM_INTERVAL);
|
| 541 |
} catch (error) {
|
| 542 |
console.error("Error accessing webcam: ", error);
|
| 543 |
}
|
|
|
|
| 563 |
const result = await app.predict("/process_image", [handledFile, "More Detailed Caption"]);
|
| 564 |
|
| 565 |
const dataString = result.data[0];
|
| 566 |
+
lastCaption = dataString || lastCaption;
|
| 567 |
} catch (error) {
|
| 568 |
console.error("Error processing with Gradio:", error);
|
| 569 |
}
|