freddyaboulton HF staff commited on
Commit
0eb6af0
·
verified ·
1 Parent(s): b1ea566

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +27 -34
app.py CHANGED
@@ -10,7 +10,6 @@ from fastrtc import (
10
  AdditionalOutputs,
11
  ReplyOnPause,
12
  Stream,
13
- WebRTCError,
14
  get_stt_model,
15
  get_twilio_turn_credentials,
16
  )
@@ -30,42 +29,36 @@ def response(
30
  audio: tuple[int, NDArray[np.int16 | np.float32]],
31
  chatbot: list[dict] | None = None,
32
  ):
33
- try:
34
- chatbot = chatbot or []
35
- messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
36
- start = time.time()
37
- text = stt_model.stt(audio)
38
- print("transcription", time.time() - start)
39
- print("prompt", text)
40
- chatbot.append({"role": "user", "content": text})
41
- yield AdditionalOutputs(chatbot)
42
- messages.append({"role": "user", "content": text})
43
- response_text = (
44
- groq_client.chat.completions.create(
45
- model="llama-3.1-8b-instant",
46
- max_tokens=512,
47
- messages=messages, # type: ignore
48
- )
49
- .choices[0]
50
- .message.content
51
  )
 
 
 
52
 
53
- chatbot.append({"role": "assistant", "content": response_text})
54
 
55
- for chunk in tts_client.text_to_speech.convert_as_stream(
56
- text=response_text, # type: ignore
57
- voice_id="JBFqnCBsd6RMkjVDRZzb",
58
- model_id="eleven_multilingual_v2",
59
- output_format="pcm_24000",
60
- ):
61
- audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
62
- yield (24000, audio_array)
63
- yield AdditionalOutputs(chatbot)
64
- except Exception:
65
- import traceback
66
-
67
- traceback.print_exc()
68
- raise WebRTCError(traceback.format_exc())
69
 
70
 
71
  chatbot = gr.Chatbot(type="messages")
 
10
  AdditionalOutputs,
11
  ReplyOnPause,
12
  Stream,
 
13
  get_stt_model,
14
  get_twilio_turn_credentials,
15
  )
 
29
  audio: tuple[int, NDArray[np.int16 | np.float32]],
30
  chatbot: list[dict] | None = None,
31
  ):
32
+ chatbot = chatbot or []
33
+ messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
34
+ start = time.time()
35
+ text = stt_model.stt(audio)
36
+ print("transcription", time.time() - start)
37
+ print("prompt", text)
38
+ chatbot.append({"role": "user", "content": text})
39
+ yield AdditionalOutputs(chatbot)
40
+ messages.append({"role": "user", "content": text})
41
+ response_text = (
42
+ groq_client.chat.completions.create(
43
+ model="llama-3.1-8b-instant",
44
+ max_tokens=512,
45
+ messages=messages, # type: ignore
 
 
 
 
46
  )
47
+ .choices[0]
48
+ .message.content
49
+ )
50
 
51
+ chatbot.append({"role": "assistant", "content": response_text})
52
 
53
+ for chunk in tts_client.text_to_speech.convert_as_stream(
54
+ text=response_text, # type: ignore
55
+ voice_id="JBFqnCBsd6RMkjVDRZzb",
56
+ model_id="eleven_multilingual_v2",
57
+ output_format="pcm_24000",
58
+ ):
59
+ audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
60
+ yield (24000, audio_array)
61
+ yield AdditionalOutputs(chatbot)
 
 
 
 
 
62
 
63
 
64
  chatbot = gr.Chatbot(type="messages")