Spaces:

salomonsky
/

chatbot-web-app

Runtime error

App Files Files Community

salomonsky commited on 29 days ago

Commit

504ccf4

verified ·

1 Parent(s): d2c7775

Upload vad.py with huggingface_hub

Browse files

Files changed (1) hide show

vad.py +195 -0

vad.py ADDED Viewed

	@@ -0,0 +1,195 @@

+import speech_recognition as sr
+import threading
+import time
+import pygame
+from response_handler import ResponseHandler
+class VoiceDetector:
+    def __init__(self, on_activation=None, on_speech=None, on_timeout=None):
+        self.recognizer = sr.Recognizer()
+        self.is_active = True
+        self.is_listening = True
+        self.last_interaction = time.time()
+        self.TIMEOUT_SECONDS = 20
+        self.clock = pygame.time.Clock()
+        self.waiting_for_activation = True
+        self.audio_utils = None
+        self.last_interrupt_time = 0
+        self.INTERRUPT_COOLDOWN = 1.0
+        # Configuración de umbrales
+        self.BASE_ENERGY_THRESHOLD = 300
+        self.HIGH_ENERGY_THRESHOLD = 600
+        self.current_energy_threshold = self.BASE_ENERGY_THRESHOLD
+        # Configuración del reconocedor
+        self.recognizer.energy_threshold = self.current_energy_threshold
+        self.recognizer.dynamic_energy_threshold = True
+        self.recognizer.dynamic_energy_adjustment_damping = 0.15
+        self.recognizer.dynamic_energy_ratio = 1.5
+        self.recognizer.pause_threshold = 0.8
+        self.recognizer.non_speaking_duration = 0.5
+        self.recognizer.phrase_threshold = 0.3
+        # Umbrales de interrupción
+        self.INTERRUPT_ENERGY_MULTIPLIER = 2.0
+        self.INTERRUPT_DURATION = 0.3
+        self.INTERRUPT_SAMPLES = 3
+        self.INTERRUPT_SUCCESS_THRESHOLD = 2
+        self.on_activation = on_activation
+        self.on_speech = on_speech
+        self.on_timeout = on_timeout
+        # Control de eco y auto-activación
+        self.last_audio_output_time = 0
+        self.AUDIO_OUTPUT_COOLDOWN = 0.3  # Reducido a 0.3 segundos
+        self.is_high_threshold_mode = False
+        # Buffer circular para detección de eco
+        self.audio_buffer = []
+        self.BUFFER_SIZE = 5
+        self.last_played_audio = None
+    def set_audio_utils(self, audio_utils):
+        self.audio_utils = audio_utils
+    def set_high_threshold_mode(self, enabled):
+        """Activa o desactiva el modo de umbral alto para escucha durante reproducción"""
+        self.is_high_threshold_mode = enabled
+        self.current_energy_threshold = self.HIGH_ENERGY_THRESHOLD if enabled else self.BASE_ENERGY_THRESHOLD
+        self.recognizer.energy_threshold = self.current_energy_threshold
+        print(f"Umbral de energía ajustado a: {self.current_energy_threshold}")
+    def start(self):
+        self.is_active = True
+        self.is_listening = True
+        threading.Thread(target=self.listen_continuously, daemon=True).start()
+    def stop(self):
+        self.is_active = False
+        self.is_listening = False
+    def listen_continuously(self):
+        while self.is_active and self.is_listening:
+            try:
+                with sr.Microphone() as source:
+                    # Ajustar para ruido ambiental solo si no estamos en modo de umbral alto
+                    if not self.is_high_threshold_mode:
+                        self.recognizer.adjust_for_ambient_noise(source, duration=0.2)
+                    try:
+                        audio = self.recognizer.listen(
+                            source,
+                            timeout=1,
+                            phrase_time_limit=5
+                        )
+                        if not self.is_active or not self.is_listening:
+                            break
+                        # Verificar si estamos reproduciendo audio
+                        if self.audio_utils and self.audio_utils.is_speaking:
+                            current_time = time.time()
+                            # Verificar cooldown de eco
+                            if current_time - self.last_audio_output_time < self.AUDIO_OUTPUT_COOLDOWN:
+                                continue
+                            # Verificar interrupción con umbral actual
+                            if self.check_for_interruption(audio.frame_data):
+                                try:
+                                    # Intentar reconocer comando de interrupción
+                                    text = self.recognizer.recognize_google(
+                                        audio,
+                                        language="es-ES"
+                                    ).lower()
+                                    # Verificar que no es eco comparando con buffer
+                                    if not self.is_echo(text):
+                                        if ResponseHandler.is_stop_command(text):
+                                            print(f"Comando de interrupción detectado: {text}")
+                                            self.audio_utils.stop_speaking()
+                                            self.last_interrupt_time = current_time
+                                except sr.UnknownValueError:
+                                    # Si no se reconoce texto pero la energía es alta, interrumpir
+                                    if self.is_high_threshold_mode:
+                                        self.audio_utils.stop_speaking()
+                                        self.last_interrupt_time = current_time
+                            continue
+                        # Procesar audio normal (no interrupción)
+                        if not self.audio_utils or not self.audio_utils.is_speaking:
+                            text = self.recognizer.recognize_google(
+                                audio,
+                                language="es-ES"
+                            ).lower()
+                            # Verificar que no es eco
+                            if not self.is_echo(text):
+                                if self.waiting_for_activation:
+                                    if ResponseHandler.is_activation_phrase(text):
+                                        self.waiting_for_activation = False
+                                        if self.on_activation:
+                                            self.on_activation()
+                                else:
+                                    if self.on_speech:
+                                        self.on_speech(text)
+                    except sr.WaitTimeoutError:
+                        continue
+                    except sr.UnknownValueError:
+                        continue
+            except Exception as e:
+                print(f"Error en reconocimiento continuo: {e}")
+                time.sleep(1)
+            self.clock.tick(30)
+    def is_echo(self, text):
+        """Verifica si el texto detectado es un eco del audio reproducido"""
+        # Comparar con el buffer de audio reciente
+        for recent_audio in self.audio_buffer:
+            if text.lower() in recent_audio.lower() or recent_audio.lower() in text.lower():
+                print("Eco detectado y filtrado")
+                return True
+        return False
+    def update_last_audio_output(self, text=None):
+        """Actualizar el timestamp del último audio reproducido y el buffer"""
+        self.last_audio_output_time = time.time()
+        if text:
+            self.audio_buffer.append(text)
+            if len(self.audio_buffer) > self.BUFFER_SIZE:
+                self.audio_buffer.pop(0)
+    def check_for_interruption(self, audio_data):
+        """Verificar si hay una interrupción válida usando el umbral actual"""
+        if not audio_data or len(audio_data) < 1000:
+            return False
+        # Calcular energía en ventanas
+        window_size = 500
+        windows = [audio_data[i:i+window_size] for i in range(0, len(audio_data), window_size)]
+        energies = []
+        for window in windows:
+            if len(window) >= 2:
+                energy = sum(abs(int.from_bytes(window[i:i+2], 'little', signed=True))
+                           for i in range(0, len(window), 2)) / (len(window)/2)
+                energies.append(energy)
+        if not energies:
+            return False
+        # Usar el umbral actual según el modo
+        threshold = self.current_energy_threshold * self.INTERRUPT_ENERGY_MULTIPLIER
+        high_energy_windows = sum(1 for e in energies if e > threshold)
+        # Requerir que al menos 70% de las ventanas tengan alta energía
+        return high_energy_windows >= len(energies) * 0.7
+    def is_speaking_check(self):
+        """Verificar si el sistema está reproduciendo audio"""
+        return self.audio_utils and self.audio_utils.is_speaking