freddyaboulton HF staff commited on
Commit
adc5c1c
·
verified ·
1 Parent(s): 134efe1

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +19 -25
app.py CHANGED
@@ -13,7 +13,6 @@ from fastapi.responses import HTMLResponse
13
  from fastrtc import (
14
  AsyncStreamHandler,
15
  Stream,
16
- async_aggregate_bytes_to_16bit,
17
  get_twilio_turn_credentials,
18
  )
19
  from google import genai
@@ -62,16 +61,9 @@ class GeminiHandler(AsyncStreamHandler):
62
  output_frame_size=self.output_frame_size,
63
  )
64
 
65
- async def stream(self) -> AsyncGenerator[bytes, None]:
66
- while not self.quit.is_set():
67
- audio = await self.input_queue.get()
68
- yield audio
69
- return
70
-
71
- async def connect(
72
- self, api_key: str | None = None, voice_name: str | None = "Kore"
73
- ) -> AsyncGenerator[bytes, None]:
74
- """Connect to to genai server and start the stream"""
75
  client = genai.Client(
76
  api_key=api_key or os.getenv("GEMINI_API_KEY"),
77
  http_options={"api_version": "v1alpha"},
@@ -93,7 +85,16 @@ class GeminiHandler(AsyncStreamHandler):
93
  stream=self.stream(), mime_type="audio/pcm"
94
  ):
95
  if audio.data:
96
- yield audio.data
 
 
 
 
 
 
 
 
 
97
 
98
  async def receive(self, frame: tuple[int, np.ndarray]) -> None:
99
  _, array = frame
@@ -101,24 +102,13 @@ class GeminiHandler(AsyncStreamHandler):
101
  audio_message = encode_audio(array)
102
  self.input_queue.put_nowait(audio_message)
103
 
104
- async def generator(self) -> None:
105
- async for audio_response in async_aggregate_bytes_to_16bit(
106
- self.connect(*self.latest_args[1:])
107
- ):
108
- self.output_queue.put_nowait(audio_response)
109
-
110
  async def emit(self) -> tuple[int, np.ndarray]:
111
- if not self.args_set.is_set():
112
- await self.wait_for_args()
113
- asyncio.create_task(self.generator())
114
-
115
  array = await self.output_queue.get()
116
  return (self.output_sample_rate, array)
117
 
118
  def shutdown(self) -> None:
119
  self.quit.set()
120
  self.args_set.clear()
121
- self.quit.clear()
122
 
123
 
124
  stream = Stream(
@@ -128,7 +118,11 @@ stream = Stream(
128
  rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
129
  concurrency_limit=20 if get_space() else None,
130
  additional_inputs=[
131
- gr.Textbox(label="API Key", type="password", value=os.getenv("GEMINI_API_KEY")),
 
 
 
 
132
  gr.Dropdown(
133
  label="Voice",
134
  choices=[
@@ -173,7 +167,7 @@ if __name__ == "__main__":
173
  import os
174
 
175
  if (mode := os.getenv("MODE")) == "UI":
176
- stream.ui.launch(server_port=7860, server_name="0.0.0.0")
177
  elif mode == "PHONE":
178
  stream.fastphone(host="0.0.0.0", port=7860)
179
  else:
 
13
  from fastrtc import (
14
  AsyncStreamHandler,
15
  Stream,
 
16
  get_twilio_turn_credentials,
17
  )
18
  from google import genai
 
61
  output_frame_size=self.output_frame_size,
62
  )
63
 
64
+ async def start_up(self):
65
+ await self.wait_for_args()
66
+ api_key, voice_name = self.latest_args[1:]
 
 
 
 
 
 
 
67
  client = genai.Client(
68
  api_key=api_key or os.getenv("GEMINI_API_KEY"),
69
  http_options={"api_version": "v1alpha"},
 
85
  stream=self.stream(), mime_type="audio/pcm"
86
  ):
87
  if audio.data:
88
+ array = np.frombuffer(audio.data, dtype=np.int16)
89
+ self.output_queue.put_nowait(array)
90
+
91
+ async def stream(self) -> AsyncGenerator[bytes, None]:
92
+ while not self.quit.is_set():
93
+ try:
94
+ audio = await asyncio.wait_for(self.input_queue.get(), 0.1)
95
+ yield audio
96
+ except (asyncio.TimeoutError, TimeoutError):
97
+ pass
98
 
99
  async def receive(self, frame: tuple[int, np.ndarray]) -> None:
100
  _, array = frame
 
102
  audio_message = encode_audio(array)
103
  self.input_queue.put_nowait(audio_message)
104
 
 
 
 
 
 
 
105
  async def emit(self) -> tuple[int, np.ndarray]:
 
 
 
 
106
  array = await self.output_queue.get()
107
  return (self.output_sample_rate, array)
108
 
109
  def shutdown(self) -> None:
110
  self.quit.set()
111
  self.args_set.clear()
 
112
 
113
 
114
  stream = Stream(
 
118
  rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
119
  concurrency_limit=20 if get_space() else None,
120
  additional_inputs=[
121
+ gr.Textbox(
122
+ label="API Key",
123
+ type="password",
124
+ value=os.getenv("GEMINI_API_KEY") if not get_space() else "",
125
+ ),
126
  gr.Dropdown(
127
  label="Voice",
128
  choices=[
 
167
  import os
168
 
169
  if (mode := os.getenv("MODE")) == "UI":
170
+ stream.ui.launch(server_port=7860)
171
  elif mode == "PHONE":
172
  stream.fastphone(host="0.0.0.0", port=7860)
173
  else: