salomonsky commited on
Commit
35d0fde
·
verified ·
1 Parent(s): 57a1742

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -8
app.py CHANGED
@@ -3,18 +3,18 @@ import base64
3
  import io
4
  from huggingface_hub import InferenceClient
5
  from gtts import gTTS
6
- from audiorecorder import audiorecorder
7
  import speech_recognition as sr
8
  from pydub import AudioSegment
 
9
 
10
  if "history" not in st.session_state:
11
  st.session_state.history = []
12
 
13
  def recognize_speech(audio_data, show_messages=True):
14
  recognizer = sr.Recognizer()
15
-
16
  try:
17
- audio_data.seek(0) # Asegurarse de que el puntero esté al inicio del archivo
18
  audio = gTTS(audio_data.read(), lang='es')
19
  audio_recording = sr.AudioFile(io.BytesIO(audio_data.read()))
20
 
@@ -68,7 +68,7 @@ def generate(audio_text, history, temperature=None, max_new_tokens=256, top_p=0.
68
 
69
  for response_token in stream:
70
  response += response_token.token.text
71
-
72
  response = ' '.join(response.split()).replace('</s>', '')
73
  audio_file = text_to_speech(response)
74
  return response, audio_file
@@ -99,11 +99,10 @@ def main():
99
  audio_text = recognize_speech(audio_data)
100
 
101
  if audio_text:
102
- output, audio_file = generate(audio_text, history=st.session_state.history)
103
 
104
- if audio_text:
105
- st.session_state.history.append((audio_text, output))
106
- # respuesta de voz
107
  if audio_file is not None:
108
  play_audio(audio_file)
109
 
 
3
  import io
4
  from huggingface_hub import InferenceClient
5
  from gtts import gTTS
 
6
  import speech_recognition as sr
7
  from pydub import AudioSegment
8
+ from audiorecorder import audiorecorder
9
 
10
  if "history" not in st.session_state:
11
  st.session_state.history = []
12
 
13
  def recognize_speech(audio_data, show_messages=True):
14
  recognizer = sr.Recognizer()
15
+
16
  try:
17
+ audio_data.seek(0)
18
  audio = gTTS(audio_data.read(), lang='es')
19
  audio_recording = sr.AudioFile(io.BytesIO(audio_data.read()))
20
 
 
68
 
69
  for response_token in stream:
70
  response += response_token.token.text
71
+
72
  response = ' '.join(response.split()).replace('</s>', '')
73
  audio_file = text_to_speech(response)
74
  return response, audio_file
 
99
  audio_text = recognize_speech(audio_data)
100
 
101
  if audio_text:
102
+ output, audio_file = generate(audio_text, history=st.session_state.history)
103
 
104
+ if audio_text:
105
+ st.session_state.history.append((audio_text, output))
 
106
  if audio_file is not None:
107
  play_audio(audio_file)
108