<script lang="ts"> import { toast } from 'svelte-sonner'; import { createEventDispatcher, tick, getContext } from 'svelte'; import { config, settings } from '$lib/stores'; import { blobToFile, calculateSHA256, findWordIndices } from '$lib/utils'; import { transcribeAudio } from '$lib/apis/audio'; const i18n = getContext('i18n'); const dispatch = createEventDispatcher(); export let recording = false; let loading = false; let confirmed = false; let durationSeconds = 0; let durationCounter = null; let transcription = ''; const startDurationCounter = () => { durationCounter = setInterval(() => { durationSeconds++; }, 1000); }; const stopDurationCounter = () => { clearInterval(durationCounter); durationSeconds = 0; }; $: if (recording) { startRecording(); } else { stopRecording(); } const formatSeconds = (seconds) => { const minutes = Math.floor(seconds / 60); const remainingSeconds = seconds % 60; const formattedSeconds = remainingSeconds < 10 ? `0${remainingSeconds}` : remainingSeconds; return `${minutes}:${formattedSeconds}`; }; let speechRecognition; let mediaRecorder; let audioChunks = []; const MIN_DECIBELS = -45; const VISUALIZER_BUFFER_LENGTH = 300; let visualizerData = Array(VISUALIZER_BUFFER_LENGTH).fill(0); // Function to calculate the RMS level from time domain data const calculateRMS = (data: Uint8Array) => { let sumSquares = 0; for (let i = 0; i < data.length; i++) { const normalizedValue = (data[i] - 128) / 128; // Normalize the data sumSquares += normalizedValue * normalizedValue; } return Math.sqrt(sumSquares / data.length); }; const normalizeRMS = (rms) => { rms = rms * 10; const exp = 1.5; // Adjust exponent value; values greater than 1 expand larger numbers more and compress smaller numbers more const scaledRMS = Math.pow(rms, exp); // Scale between 0.01 (1%) and 1.0 (100%) return Math.min(1.0, Math.max(0.01, scaledRMS)); }; const analyseAudio = (stream) => { const audioContext = new AudioContext(); const audioStreamSource = audioContext.createMediaStreamSource(stream); const analyser = audioContext.createAnalyser(); analyser.minDecibels = MIN_DECIBELS; audioStreamSource.connect(analyser); const bufferLength = analyser.frequencyBinCount; const domainData = new Uint8Array(bufferLength); const timeDomainData = new Uint8Array(analyser.fftSize); let lastSoundTime = Date.now(); const detectSound = () => { const processFrame = () => { if (!recording || loading) return; if (recording && !loading) { analyser.getByteTimeDomainData(timeDomainData); analyser.getByteFrequencyData(domainData); // Calculate RMS level from time domain data const rmsLevel = calculateRMS(timeDomainData); // Push the calculated decibel level to visualizerData visualizerData.push(normalizeRMS(rmsLevel)); // Ensure visualizerData array stays within the buffer length if (visualizerData.length >= VISUALIZER_BUFFER_LENGTH) { visualizerData.shift(); } visualizerData = visualizerData; // if (domainData.some((value) => value > 0)) { // lastSoundTime = Date.now(); // } // if (recording && Date.now() - lastSoundTime > 3000) { // if ($settings?.speechAutoSend ?? false) { // confirmRecording(); // } // } } window.requestAnimationFrame(processFrame); }; window.requestAnimationFrame(processFrame); }; detectSound(); }; const transcribeHandler = async (audioBlob) => { // Create a blob from the audio chunks await tick(); const file = blobToFile(audioBlob, 'recording.wav'); const res = await transcribeAudio(localStorage.token, file).catch((error) => { toast.error(error); return null; }); if (res) { console.log(res.text); dispatch('confirm', res.text); } }; const saveRecording = (blob) => { const url = URL.createObjectURL(blob); const a = document.createElement('a'); document.body.appendChild(a); a.style = 'display: none'; a.href = url; a.download = 'recording.wav'; a.click(); window.URL.revokeObjectURL(url); }; const startRecording = async () => { startDurationCounter(); const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); mediaRecorder = new MediaRecorder(stream); mediaRecorder.onstart = () => { console.log('Recording started'); audioChunks = []; analyseAudio(stream); }; mediaRecorder.ondataavailable = (event) => audioChunks.push(event.data); mediaRecorder.onstop = async () => { console.log('Recording stopped'); if (($settings?.audio?.stt?.engine ?? '') === 'web') { audioChunks = []; } else { if (confirmed) { const audioBlob = new Blob(audioChunks, { type: 'audio/wav' }); await transcribeHandler(audioBlob); confirmed = false; loading = false; } audioChunks = []; recording = false; } }; mediaRecorder.start(); if ($config.audio.stt.engine === 'web' || ($settings?.audio?.stt?.engine ?? '') === 'web') { if ('SpeechRecognition' in window || 'webkitSpeechRecognition' in window) { // Create a SpeechRecognition object speechRecognition = new (window.SpeechRecognition || window.webkitSpeechRecognition)(); // Set continuous to true for continuous recognition speechRecognition.continuous = true; // Set the timeout for turning off the recognition after inactivity (in milliseconds) const inactivityTimeout = 2000; // 3 seconds let timeoutId; // Start recognition speechRecognition.start(); // Event triggered when speech is recognized speechRecognition.onresult = async (event) => { // Clear the inactivity timeout clearTimeout(timeoutId); // Handle recognized speech console.log(event); const transcript = event.results[Object.keys(event.results).length - 1][0].transcript; transcription = `${transcription}${transcript}`; await tick(); document.getElementById('chat-textarea')?.focus(); // Restart the inactivity timeout timeoutId = setTimeout(() => { console.log('Speech recognition turned off due to inactivity.'); speechRecognition.stop(); }, inactivityTimeout); }; // Event triggered when recognition is ended speechRecognition.onend = function () { // Restart recognition after it ends console.log('recognition ended'); confirmRecording(); dispatch('confirm', transcription); confirmed = false; loading = false; }; // Event triggered when an error occurs speechRecognition.onerror = function (event) { console.log(event); toast.error($i18n.t(`Speech recognition error: {{error}}`, { error: event.error })); dispatch('cancel'); stopRecording(); }; } } }; const stopRecording = async () => { if (recording && mediaRecorder) { await mediaRecorder.stop(); } stopDurationCounter(); audioChunks = []; }; const confirmRecording = async () => { loading = true; confirmed = true; if (recording && mediaRecorder) { await mediaRecorder.stop(); } clearInterval(durationCounter); }; </script> <div class="{loading ? ' bg-gray-100/50 dark:bg-gray-850/50' : 'bg-indigo-300/10 dark:bg-indigo-500/10 '} rounded-full flex p-2.5" > <div class="flex items-center mr-1"> <button type="button" class="p-1.5 {loading ? ' bg-gray-200 dark:bg-gray-700/50' : 'bg-indigo-400/20 text-indigo-600 dark:text-indigo-300 '} rounded-full" on:click={async () => { dispatch('cancel'); stopRecording(); }} > <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="3" stroke="currentColor" class="size-4" > <path stroke-linecap="round" stroke-linejoin="round" d="M6 18 18 6M6 6l12 12" /> </svg> </button> </div> <div class="flex flex-1 self-center items-center justify-between ml-2 mx-1 overflow-hidden h-6" dir="rtl" > <div class="flex-1 flex items-center gap-0.5 h-6"> {#each visualizerData.slice().reverse() as rms} <div class="w-[2px] {loading ? ' bg-gray-500 dark:bg-gray-400 ' : 'bg-indigo-500 dark:bg-indigo-400 '} inline-block h-full" style="height: {Math.min(100, Math.max(14, rms * 100))}%;" /> {/each} </div> </div> <div class=" mx-1.5 pr-1 flex justify-center items-center"> <div class="text-sm {loading ? ' text-gray-500 dark:text-gray-400 ' : ' text-indigo-400 '} font-medium flex-1 mx-auto text-center" > {formatSeconds(durationSeconds)} </div> </div> <div class="flex items-center mr-1"> {#if loading} <div class=" text-gray-500 rounded-full cursor-not-allowed"> <svg width="24" height="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="currentColor" ><style> .spinner_OSmW { transform-origin: center; animation: spinner_T6mA 0.75s step-end infinite; } @keyframes spinner_T6mA { 8.3% { transform: rotate(30deg); } 16.6% { transform: rotate(60deg); } 25% { transform: rotate(90deg); } 33.3% { transform: rotate(120deg); } 41.6% { transform: rotate(150deg); } 50% { transform: rotate(180deg); } 58.3% { transform: rotate(210deg); } 66.6% { transform: rotate(240deg); } 75% { transform: rotate(270deg); } 83.3% { transform: rotate(300deg); } 91.6% { transform: rotate(330deg); } 100% { transform: rotate(360deg); } } </style><g class="spinner_OSmW" ><rect x="11" y="1" width="2" height="5" opacity=".14" /><rect x="11" y="1" width="2" height="5" transform="rotate(30 12 12)" opacity=".29" /><rect x="11" y="1" width="2" height="5" transform="rotate(60 12 12)" opacity=".43" /><rect x="11" y="1" width="2" height="5" transform="rotate(90 12 12)" opacity=".57" /><rect x="11" y="1" width="2" height="5" transform="rotate(120 12 12)" opacity=".71" /><rect x="11" y="1" width="2" height="5" transform="rotate(150 12 12)" opacity=".86" /><rect x="11" y="1" width="2" height="5" transform="rotate(180 12 12)" /></g ></svg > </div> {:else} <button type="button" class="p-1.5 bg-indigo-500 text-white dark:bg-indigo-500 dark:text-blue-950 rounded-full" on:click={async () => { await confirmRecording(); }} > <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="2.5" stroke="currentColor" class="size-4" > <path stroke-linecap="round" stroke-linejoin="round" d="m4.5 12.75 6 6 9-13.5" /> </svg> </button> {/if} </div> </div> <style> .visualizer { display: flex; height: 100%; } .visualizer-bar { width: 2px; background-color: #4a5aba; /* or whatever color you need */ } </style>