Spaces:
Runtime error
Runtime error
Upload tts_utils.py with huggingface_hub
Browse files- tts_utils.py +468 -0
tts_utils.py
ADDED
@@ -0,0 +1,468 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pygame
|
2 |
+
import tempfile
|
3 |
+
import uuid
|
4 |
+
import threading
|
5 |
+
import asyncio
|
6 |
+
from pathlib import Path
|
7 |
+
from TTS.api import TTS
|
8 |
+
from gtts import gTTS
|
9 |
+
import edge_tts
|
10 |
+
import logging
|
11 |
+
import time
|
12 |
+
import os
|
13 |
+
|
14 |
+
class TTSUtils:
|
15 |
+
AVAILABLE_MODELS = {
|
16 |
+
'EDGE': {
|
17 |
+
'name': "es-MX-JorgeNeural",
|
18 |
+
'description': "Voz masculina de Microsoft Edge (MX)",
|
19 |
+
'type': 'edge',
|
20 |
+
'rate': '+25%'
|
21 |
+
},
|
22 |
+
'EDGE_ES': {
|
23 |
+
'name': "es-ES-AlvaroNeural",
|
24 |
+
'description': "Voz masculina de Microsoft Edge (ES)",
|
25 |
+
'type': 'edge',
|
26 |
+
'rate': '+25%'
|
27 |
+
},
|
28 |
+
'VITS': {
|
29 |
+
'name': "tts_models/es/css10/vits",
|
30 |
+
'description': "Voz masculina de VITS (ES)",
|
31 |
+
'type': 'local',
|
32 |
+
'config': {
|
33 |
+
'speed': 1.25,
|
34 |
+
'model_path': "tts_models/es/css10/vits"
|
35 |
+
}
|
36 |
+
}
|
37 |
+
}
|
38 |
+
|
39 |
+
def __init__(self, model_name='EDGE', elevenlabs_api_key=None):
|
40 |
+
"""Inicializa el motor TTS"""
|
41 |
+
self.is_speaking = False
|
42 |
+
self.should_stop = False
|
43 |
+
self.temp_dir = Path(tempfile.gettempdir()) / "chatbot_audio"
|
44 |
+
self.temp_dir.mkdir(exist_ok=True)
|
45 |
+
self.tts = None
|
46 |
+
self.audio_initialized = False
|
47 |
+
self.current_model = model_name
|
48 |
+
print(f"Inicializando TTS con modelo: {model_name}")
|
49 |
+
|
50 |
+
try:
|
51 |
+
if pygame.mixer.get_init():
|
52 |
+
pygame.mixer.quit()
|
53 |
+
pygame.mixer.init(frequency=16000, size=-16, channels=1, buffer=2048)
|
54 |
+
pygame.mixer.music.set_volume(0.8)
|
55 |
+
self.audio_initialized = True
|
56 |
+
print("Audio inicializado correctamente")
|
57 |
+
except Exception as e:
|
58 |
+
print(f"Error inicializando audio: {str(e)}")
|
59 |
+
self.audio_initialized = False
|
60 |
+
|
61 |
+
self.play_lock = threading.Lock()
|
62 |
+
self.clock = pygame.time.Clock()
|
63 |
+
self.init_audio()
|
64 |
+
|
65 |
+
# Limpiar archivos temporales antiguos
|
66 |
+
self._cleanup_old_files()
|
67 |
+
|
68 |
+
def _cleanup_old_files(self, max_age_hours=1):
|
69 |
+
"""Limpia archivos temporales antiguos"""
|
70 |
+
try:
|
71 |
+
current_time = time.time()
|
72 |
+
for file in self.temp_dir.glob("*"):
|
73 |
+
if file.is_file():
|
74 |
+
file_age = current_time - file.stat().st_mtime
|
75 |
+
if file_age > max_age_hours * 3600: # Convertir horas a segundos
|
76 |
+
try:
|
77 |
+
file.unlink()
|
78 |
+
except:
|
79 |
+
pass
|
80 |
+
except Exception as e:
|
81 |
+
print(f"Error limpiando archivos temporales: {e}")
|
82 |
+
|
83 |
+
def _verify_audio_system(self):
|
84 |
+
"""Verifica el estado del sistema de audio"""
|
85 |
+
if not self.audio_initialized or not pygame.mixer.get_init():
|
86 |
+
try:
|
87 |
+
if pygame.mixer.get_init():
|
88 |
+
pygame.mixer.quit()
|
89 |
+
pygame.mixer.init(frequency=44100, size=-16, channels=2, buffer=4096)
|
90 |
+
pygame.mixer.music.set_volume(1.0)
|
91 |
+
self.audio_initialized = True
|
92 |
+
return True
|
93 |
+
except Exception as e:
|
94 |
+
print(f"Error reinicializando audio: {e}")
|
95 |
+
return False
|
96 |
+
return True
|
97 |
+
|
98 |
+
def init_audio(self):
|
99 |
+
"""Inicializa el modelo TTS seleccionado"""
|
100 |
+
try:
|
101 |
+
if self.current_model == 'VITS':
|
102 |
+
model_info = self.AVAILABLE_MODELS[self.current_model]
|
103 |
+
print(f"Cargando modelo VITS: {model_info['name']}")
|
104 |
+
try:
|
105 |
+
self.tts = TTS(model_name=model_info['name'])
|
106 |
+
if hasattr(self.tts, 'synthesizer') and hasattr(self.tts.synthesizer, 'tts_config'):
|
107 |
+
self.tts.synthesizer.tts_config.update(model_info['config'])
|
108 |
+
print("Modelo VITS cargado correctamente")
|
109 |
+
return True
|
110 |
+
else:
|
111 |
+
print("Error: El modelo VITS no tiene la estructura esperada")
|
112 |
+
self.current_model = 'EDGE' # Fallback a Edge si hay error
|
113 |
+
return False
|
114 |
+
except Exception as vits_error:
|
115 |
+
print(f"Error cargando modelo VITS: {vits_error}")
|
116 |
+
self.current_model = 'EDGE' # Fallback a Edge si hay error
|
117 |
+
return False
|
118 |
+
return True
|
119 |
+
except Exception as e:
|
120 |
+
print(f"Error inicializando audio: {e}")
|
121 |
+
self.current_model = 'EDGE' # Fallback a Edge si hay error
|
122 |
+
return False
|
123 |
+
|
124 |
+
def _number_to_words(self, number):
|
125 |
+
"""Convierte un n煤mero a palabras en espa帽ol"""
|
126 |
+
UNITS = ['', 'uno', 'dos', 'tres', 'cuatro', 'cinco', 'seis', 'siete', 'ocho', 'nueve']
|
127 |
+
TENS = ['', 'diez', 'veinte', 'treinta', 'cuarenta', 'cincuenta', 'sesenta', 'setenta', 'ochenta', 'noventa']
|
128 |
+
TEENS = ['diez', 'once', 'doce', 'trece', 'catorce', 'quince', 'diecis茅is', 'diecisiete', 'dieciocho', 'diecinueve']
|
129 |
+
|
130 |
+
try:
|
131 |
+
num = int(number)
|
132 |
+
if num == 0:
|
133 |
+
return 'cero'
|
134 |
+
elif num < 0:
|
135 |
+
return f"menos {self._number_to_words(abs(num))}"
|
136 |
+
elif num < 10:
|
137 |
+
return UNITS[num]
|
138 |
+
elif num < 20:
|
139 |
+
return TEENS[num - 10]
|
140 |
+
elif num < 100:
|
141 |
+
tens = num // 10
|
142 |
+
units = num % 10
|
143 |
+
if units == 0:
|
144 |
+
return TENS[tens]
|
145 |
+
else:
|
146 |
+
return f"{TENS[tens]} y {UNITS[units]}"
|
147 |
+
else:
|
148 |
+
return str(num) # Para n煤meros mayores a 99, mantener d铆gitos
|
149 |
+
except:
|
150 |
+
return number # Si hay error, devolver el n煤mero original
|
151 |
+
|
152 |
+
def _clean_text(self, text):
|
153 |
+
"""Limpia el texto de caracteres especiales antes de la s铆ntesis"""
|
154 |
+
if not text:
|
155 |
+
return text
|
156 |
+
|
157 |
+
# Reemplazar asteriscos y otros caracteres especiales
|
158 |
+
replacements = {
|
159 |
+
'*': '',
|
160 |
+
'#': '',
|
161 |
+
'`': '',
|
162 |
+
'~': '',
|
163 |
+
'|': '',
|
164 |
+
'>': '',
|
165 |
+
'<': '',
|
166 |
+
'\\': '',
|
167 |
+
'&': 'y',
|
168 |
+
'_': ' ',
|
169 |
+
'...': ',',
|
170 |
+
'..': ',',
|
171 |
+
'---': ',',
|
172 |
+
'--': ',',
|
173 |
+
'%': ' por ciento',
|
174 |
+
'$': ' pesos',
|
175 |
+
'=': ' igual a ',
|
176 |
+
'+': ' m谩s ',
|
177 |
+
'@': ' arroba ',
|
178 |
+
}
|
179 |
+
|
180 |
+
cleaned_text = text
|
181 |
+
for char, replacement in replacements.items():
|
182 |
+
cleaned_text = cleaned_text.replace(char, replacement)
|
183 |
+
|
184 |
+
# Convertir n煤meros a palabras
|
185 |
+
words = []
|
186 |
+
for word in cleaned_text.split():
|
187 |
+
# Verificar si es un n煤mero (entero o decimal)
|
188 |
+
if word.replace('.', '').replace('-', '').isdigit():
|
189 |
+
# Si es decimal
|
190 |
+
if '.' in word:
|
191 |
+
parts = word.split('.')
|
192 |
+
if len(parts) == 2:
|
193 |
+
integer_part = self._number_to_words(parts[0])
|
194 |
+
decimal_part = self._number_to_words(parts[1])
|
195 |
+
words.append(f"{integer_part} punto {decimal_part}")
|
196 |
+
else:
|
197 |
+
words.append(word)
|
198 |
+
else:
|
199 |
+
words.append(self._number_to_words(word))
|
200 |
+
else:
|
201 |
+
words.append(word)
|
202 |
+
|
203 |
+
cleaned_text = ' '.join(words)
|
204 |
+
|
205 |
+
# Eliminar espacios m煤ltiples
|
206 |
+
cleaned_text = ' '.join(cleaned_text.split())
|
207 |
+
|
208 |
+
return cleaned_text
|
209 |
+
|
210 |
+
def text_to_speech(self, text, save_path=None):
|
211 |
+
"""Genera audio a partir de texto con verificaciones mejoradas"""
|
212 |
+
if not text:
|
213 |
+
return None
|
214 |
+
|
215 |
+
# Limpiar el texto antes de procesarlo
|
216 |
+
text = self._clean_text(text)
|
217 |
+
if not text:
|
218 |
+
return None
|
219 |
+
|
220 |
+
if not self._verify_audio_system():
|
221 |
+
print("Sistema de audio no disponible")
|
222 |
+
return None
|
223 |
+
|
224 |
+
try:
|
225 |
+
temp_file = save_path or str(self.temp_dir / f"{uuid.uuid4()}.mp3")
|
226 |
+
print(f"Generando audio para modelo: {self.current_model}")
|
227 |
+
|
228 |
+
# Verificar modelo actual y generar audio
|
229 |
+
try:
|
230 |
+
if self.current_model == 'VITS':
|
231 |
+
print("Usando modelo VITS")
|
232 |
+
if not self.tts:
|
233 |
+
print("Inicializando modelo VITS...")
|
234 |
+
if not self.init_audio():
|
235 |
+
print("Fallback a Edge debido a error en inicializaci贸n de VITS")
|
236 |
+
return self.fallback_to_edge(text, temp_file)
|
237 |
+
try:
|
238 |
+
self.tts.tts_to_file(
|
239 |
+
text=text,
|
240 |
+
file_path=temp_file,
|
241 |
+
speed=self.AVAILABLE_MODELS['VITS']['config']['speed']
|
242 |
+
)
|
243 |
+
if os.path.exists(temp_file) and os.path.getsize(temp_file) > 0:
|
244 |
+
print(f"Audio generado correctamente con VITS: {os.path.getsize(temp_file)} bytes")
|
245 |
+
return temp_file
|
246 |
+
raise Exception("Archivo de audio VITS inv谩lido")
|
247 |
+
except Exception as vits_error:
|
248 |
+
print(f"Error generando audio con VITS: {vits_error}")
|
249 |
+
return self.fallback_to_edge(text, temp_file)
|
250 |
+
|
251 |
+
elif self.current_model in ['EDGE', 'EDGE_ES']:
|
252 |
+
return self.fallback_to_edge(text, temp_file)
|
253 |
+
|
254 |
+
except Exception as primary_error:
|
255 |
+
print(f"Error con el modelo primario {self.current_model}: {primary_error}")
|
256 |
+
return self.fallback_to_gtts(text, temp_file)
|
257 |
+
|
258 |
+
return temp_file
|
259 |
+
|
260 |
+
except Exception as e:
|
261 |
+
print(f"Error en text_to_speech: {e}")
|
262 |
+
return None
|
263 |
+
finally:
|
264 |
+
self._cleanup_old_files()
|
265 |
+
|
266 |
+
def fallback_to_edge(self, text, temp_file):
|
267 |
+
"""M茅todo de respaldo usando Edge TTS"""
|
268 |
+
try:
|
269 |
+
voice = self.AVAILABLE_MODELS['EDGE']['name']
|
270 |
+
print(f"Usando voz Edge como respaldo: {voice}")
|
271 |
+
for attempt in range(3):
|
272 |
+
try:
|
273 |
+
async def tts_with_timeout():
|
274 |
+
return await asyncio.wait_for(
|
275 |
+
self.edge_tts_speak(text, voice, temp_file),
|
276 |
+
timeout=15.0
|
277 |
+
)
|
278 |
+
asyncio.run(tts_with_timeout())
|
279 |
+
if os.path.exists(temp_file) and os.path.getsize(temp_file) > 0:
|
280 |
+
print(f"Audio generado correctamente con Edge: {os.path.getsize(temp_file)} bytes")
|
281 |
+
return temp_file
|
282 |
+
raise Exception("Archivo de audio Edge inv谩lido")
|
283 |
+
except Exception as e:
|
284 |
+
print(f"Intento {attempt + 1} fallido con Edge: {e}")
|
285 |
+
if attempt == 2:
|
286 |
+
return self.fallback_to_gtts(text, temp_file)
|
287 |
+
time.sleep(2 ** attempt)
|
288 |
+
except Exception as edge_error:
|
289 |
+
print(f"Error con Edge TTS: {edge_error}")
|
290 |
+
return self.fallback_to_gtts(text, temp_file)
|
291 |
+
|
292 |
+
def fallback_to_gtts(self, text, temp_file):
|
293 |
+
"""M茅todo final de respaldo usando gTTS"""
|
294 |
+
print("Usando gTTS como 煤ltimo respaldo")
|
295 |
+
try:
|
296 |
+
tts = gTTS(text=text, lang='es', slow=False)
|
297 |
+
temp_normal = str(self.temp_dir / f"temp_normal_{uuid.uuid4()}.mp3")
|
298 |
+
tts.save(temp_normal)
|
299 |
+
|
300 |
+
import ffmpeg
|
301 |
+
stream = ffmpeg.input(temp_normal)
|
302 |
+
stream = ffmpeg.output(stream, temp_file, acodec='libmp3lame', atempo=1.25)
|
303 |
+
ffmpeg.run(stream, overwrite_output=True, capture_stdout=True, capture_stderr=True)
|
304 |
+
|
305 |
+
try:
|
306 |
+
os.remove(temp_normal)
|
307 |
+
except:
|
308 |
+
pass
|
309 |
+
|
310 |
+
if os.path.exists(temp_file) and os.path.getsize(temp_file) > 0:
|
311 |
+
print(f"Audio generado correctamente con gTTS: {os.path.getsize(temp_file)} bytes")
|
312 |
+
return temp_file
|
313 |
+
raise Exception("Archivo de audio gTTS inv谩lido")
|
314 |
+
except Exception as gtts_error:
|
315 |
+
print(f"Error con gTTS: {gtts_error}")
|
316 |
+
return None
|
317 |
+
|
318 |
+
async def edge_tts_speak(self, text, voice, output_file):
|
319 |
+
"""Genera audio usando edge-tts"""
|
320 |
+
try:
|
321 |
+
print(f"Generando audio con voz: {voice}")
|
322 |
+
rate = self.AVAILABLE_MODELS[self.current_model]['rate']
|
323 |
+
print(f"Usando rate: {rate}")
|
324 |
+
communicate = edge_tts.Communicate(text, voice, rate=rate)
|
325 |
+
await communicate.save(str(output_file))
|
326 |
+
print(f"Audio generado correctamente con {voice}")
|
327 |
+
return True
|
328 |
+
except Exception as e:
|
329 |
+
print(f"Error generando audio con edge-tts: {e}")
|
330 |
+
raise
|
331 |
+
|
332 |
+
def stop_speaking(self):
|
333 |
+
"""Detiene la reproducci贸n actual"""
|
334 |
+
if self.is_speaking:
|
335 |
+
try:
|
336 |
+
self.should_stop = True
|
337 |
+
pygame.mixer.music.stop()
|
338 |
+
pygame.mixer.music.unload()
|
339 |
+
self.is_speaking = False
|
340 |
+
print("Reproducci贸n detenida por interrupci贸n")
|
341 |
+
except Exception as e:
|
342 |
+
print(f"Error al detener el audio: {e}")
|
343 |
+
finally:
|
344 |
+
self.is_speaking = False
|
345 |
+
self.should_stop = False
|
346 |
+
|
347 |
+
def change_model(self, model_name):
|
348 |
+
"""Cambia el modelo TTS actual"""
|
349 |
+
if model_name not in self.AVAILABLE_MODELS:
|
350 |
+
print(f"Modelo {model_name} no disponible")
|
351 |
+
return False
|
352 |
+
|
353 |
+
try:
|
354 |
+
print(f"Cambiando a modelo {model_name}...")
|
355 |
+
self.current_model = model_name
|
356 |
+
self.init_audio()
|
357 |
+
return True
|
358 |
+
except Exception as e:
|
359 |
+
print(f"Error cambiando modelo: {e}")
|
360 |
+
return False
|
361 |
+
|
362 |
+
def is_currently_speaking(self):
|
363 |
+
"""Verifica si hay audio reproduci茅ndose"""
|
364 |
+
return self.is_speaking
|
365 |
+
|
366 |
+
def create_audio_file(self, text, output_file):
|
367 |
+
"""Crea un archivo de audio permanente"""
|
368 |
+
try:
|
369 |
+
if 'EDGE' in self.current_model:
|
370 |
+
voice = self.AVAILABLE_MODELS[self.current_model]['name']
|
371 |
+
async def tts_with_timeout():
|
372 |
+
return await asyncio.wait_for(
|
373 |
+
self.edge_tts_speak(text, voice, output_file),
|
374 |
+
timeout=5.0
|
375 |
+
)
|
376 |
+
asyncio.run(tts_with_timeout())
|
377 |
+
elif self.current_model == 'gTTS':
|
378 |
+
tts = gTTS(text=text, lang='es', slow=False)
|
379 |
+
tts.save(str(output_file))
|
380 |
+
else: # VITS
|
381 |
+
self.tts.tts_to_file(
|
382 |
+
text=text,
|
383 |
+
file_path=str(output_file),
|
384 |
+
speaker_wav=None,
|
385 |
+
split_sentences=False
|
386 |
+
)
|
387 |
+
return str(output_file)
|
388 |
+
except Exception as e:
|
389 |
+
print(f"Error creando archivo de audio: {e}")
|
390 |
+
return None
|
391 |
+
|
392 |
+
def play_audio(self, file_path):
|
393 |
+
"""Reproduce un archivo de audio con verificaciones mejoradas y escucha activa"""
|
394 |
+
if not self._verify_audio_system():
|
395 |
+
raise Exception("Sistema de audio no disponible")
|
396 |
+
|
397 |
+
try:
|
398 |
+
if not Path(file_path).exists():
|
399 |
+
raise FileNotFoundError(f"Archivo no encontrado: {file_path}")
|
400 |
+
|
401 |
+
if not Path(file_path).stat().st_size > 0:
|
402 |
+
raise ValueError("Archivo de audio vac铆o o corrupto")
|
403 |
+
|
404 |
+
with self.play_lock:
|
405 |
+
if self.is_speaking:
|
406 |
+
self.stop_speaking()
|
407 |
+
|
408 |
+
self.is_speaking = True
|
409 |
+
pygame.mixer.music.load(file_path)
|
410 |
+
pygame.mixer.music.play()
|
411 |
+
|
412 |
+
# Notificar al detector de voz que estamos reproduciendo
|
413 |
+
if hasattr(self, 'voice_detector'):
|
414 |
+
self.voice_detector.update_last_audio_output()
|
415 |
+
# Mantener la escucha activa pero con umbral m谩s alto
|
416 |
+
self.voice_detector.set_high_threshold_mode(True)
|
417 |
+
|
418 |
+
# Configurar callback para cuando termine la reproducci贸n
|
419 |
+
def on_music_end():
|
420 |
+
self.is_speaking = False
|
421 |
+
self.should_stop = False
|
422 |
+
# Restaurar umbral normal de escucha
|
423 |
+
if hasattr(self, 'voice_detector'):
|
424 |
+
self.voice_detector.set_high_threshold_mode(False)
|
425 |
+
|
426 |
+
pygame.mixer.music.set_endevent(pygame.USEREVENT)
|
427 |
+
pygame.event.set_allowed(pygame.USEREVENT)
|
428 |
+
|
429 |
+
# Esperar a que termine sin bloquear
|
430 |
+
while pygame.mixer.music.get_busy() and not self.should_stop:
|
431 |
+
for event in pygame.event.get():
|
432 |
+
if event.type == pygame.USEREVENT:
|
433 |
+
on_music_end()
|
434 |
+
self.clock.tick(30)
|
435 |
+
|
436 |
+
if self.should_stop:
|
437 |
+
self.stop_speaking()
|
438 |
+
# Restaurar umbral normal al interrumpir
|
439 |
+
if hasattr(self, 'voice_detector'):
|
440 |
+
self.voice_detector.set_high_threshold_mode(False)
|
441 |
+
|
442 |
+
except Exception as e:
|
443 |
+
print(f"Error reproduciendo audio: {e}")
|
444 |
+
self.is_speaking = False
|
445 |
+
self.audio_initialized = False
|
446 |
+
if hasattr(self, 'voice_detector'):
|
447 |
+
self.voice_detector.set_high_threshold_mode(False)
|
448 |
+
raise
|
449 |
+
finally:
|
450 |
+
self.is_speaking = False
|
451 |
+
self.should_stop = False
|
452 |
+
|
453 |
+
def set_voice_detector(self, voice_detector):
|
454 |
+
"""Establece el detector de voz para coordinar interrupciones"""
|
455 |
+
self.voice_detector = voice_detector
|
456 |
+
|
457 |
+
def __del__(self):
|
458 |
+
try:
|
459 |
+
pygame.mixer.quit()
|
460 |
+
if self.temp_dir.exists():
|
461 |
+
for file in self.temp_dir.glob("*"):
|
462 |
+
try:
|
463 |
+
file.unlink()
|
464 |
+
except:
|
465 |
+
pass
|
466 |
+
self.temp_dir.rmdir()
|
467 |
+
except:
|
468 |
+
pass
|