Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Upload folder using huggingface_hub
Browse files
app.py
CHANGED
@@ -13,7 +13,6 @@ from fastapi.responses import HTMLResponse
|
|
13 |
from fastrtc import (
|
14 |
AsyncStreamHandler,
|
15 |
Stream,
|
16 |
-
async_aggregate_bytes_to_16bit,
|
17 |
get_twilio_turn_credentials,
|
18 |
)
|
19 |
from google import genai
|
@@ -62,16 +61,9 @@ class GeminiHandler(AsyncStreamHandler):
|
|
62 |
output_frame_size=self.output_frame_size,
|
63 |
)
|
64 |
|
65 |
-
async def
|
66 |
-
|
67 |
-
|
68 |
-
yield audio
|
69 |
-
return
|
70 |
-
|
71 |
-
async def connect(
|
72 |
-
self, api_key: str | None = None, voice_name: str | None = "Kore"
|
73 |
-
) -> AsyncGenerator[bytes, None]:
|
74 |
-
"""Connect to to genai server and start the stream"""
|
75 |
client = genai.Client(
|
76 |
api_key=api_key or os.getenv("GEMINI_API_KEY"),
|
77 |
http_options={"api_version": "v1alpha"},
|
@@ -93,7 +85,16 @@ class GeminiHandler(AsyncStreamHandler):
|
|
93 |
stream=self.stream(), mime_type="audio/pcm"
|
94 |
):
|
95 |
if audio.data:
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
async def receive(self, frame: tuple[int, np.ndarray]) -> None:
|
99 |
_, array = frame
|
@@ -101,24 +102,13 @@ class GeminiHandler(AsyncStreamHandler):
|
|
101 |
audio_message = encode_audio(array)
|
102 |
self.input_queue.put_nowait(audio_message)
|
103 |
|
104 |
-
async def generator(self) -> None:
|
105 |
-
async for audio_response in async_aggregate_bytes_to_16bit(
|
106 |
-
self.connect(*self.latest_args[1:])
|
107 |
-
):
|
108 |
-
self.output_queue.put_nowait(audio_response)
|
109 |
-
|
110 |
async def emit(self) -> tuple[int, np.ndarray]:
|
111 |
-
if not self.args_set.is_set():
|
112 |
-
await self.wait_for_args()
|
113 |
-
asyncio.create_task(self.generator())
|
114 |
-
|
115 |
array = await self.output_queue.get()
|
116 |
return (self.output_sample_rate, array)
|
117 |
|
118 |
def shutdown(self) -> None:
|
119 |
self.quit.set()
|
120 |
self.args_set.clear()
|
121 |
-
self.quit.clear()
|
122 |
|
123 |
|
124 |
stream = Stream(
|
@@ -128,7 +118,11 @@ stream = Stream(
|
|
128 |
rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
|
129 |
concurrency_limit=20 if get_space() else None,
|
130 |
additional_inputs=[
|
131 |
-
gr.Textbox(
|
|
|
|
|
|
|
|
|
132 |
gr.Dropdown(
|
133 |
label="Voice",
|
134 |
choices=[
|
@@ -173,7 +167,7 @@ if __name__ == "__main__":
|
|
173 |
import os
|
174 |
|
175 |
if (mode := os.getenv("MODE")) == "UI":
|
176 |
-
stream.ui.launch(server_port=7860
|
177 |
elif mode == "PHONE":
|
178 |
stream.fastphone(host="0.0.0.0", port=7860)
|
179 |
else:
|
|
|
13 |
from fastrtc import (
|
14 |
AsyncStreamHandler,
|
15 |
Stream,
|
|
|
16 |
get_twilio_turn_credentials,
|
17 |
)
|
18 |
from google import genai
|
|
|
61 |
output_frame_size=self.output_frame_size,
|
62 |
)
|
63 |
|
64 |
+
async def start_up(self):
|
65 |
+
await self.wait_for_args()
|
66 |
+
api_key, voice_name = self.latest_args[1:]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
client = genai.Client(
|
68 |
api_key=api_key or os.getenv("GEMINI_API_KEY"),
|
69 |
http_options={"api_version": "v1alpha"},
|
|
|
85 |
stream=self.stream(), mime_type="audio/pcm"
|
86 |
):
|
87 |
if audio.data:
|
88 |
+
array = np.frombuffer(audio.data, dtype=np.int16)
|
89 |
+
self.output_queue.put_nowait(array)
|
90 |
+
|
91 |
+
async def stream(self) -> AsyncGenerator[bytes, None]:
|
92 |
+
while not self.quit.is_set():
|
93 |
+
try:
|
94 |
+
audio = await asyncio.wait_for(self.input_queue.get(), 0.1)
|
95 |
+
yield audio
|
96 |
+
except (asyncio.TimeoutError, TimeoutError):
|
97 |
+
pass
|
98 |
|
99 |
async def receive(self, frame: tuple[int, np.ndarray]) -> None:
|
100 |
_, array = frame
|
|
|
102 |
audio_message = encode_audio(array)
|
103 |
self.input_queue.put_nowait(audio_message)
|
104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
async def emit(self) -> tuple[int, np.ndarray]:
|
|
|
|
|
|
|
|
|
106 |
array = await self.output_queue.get()
|
107 |
return (self.output_sample_rate, array)
|
108 |
|
109 |
def shutdown(self) -> None:
|
110 |
self.quit.set()
|
111 |
self.args_set.clear()
|
|
|
112 |
|
113 |
|
114 |
stream = Stream(
|
|
|
118 |
rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
|
119 |
concurrency_limit=20 if get_space() else None,
|
120 |
additional_inputs=[
|
121 |
+
gr.Textbox(
|
122 |
+
label="API Key",
|
123 |
+
type="password",
|
124 |
+
value=os.getenv("GEMINI_API_KEY") if not get_space() else "",
|
125 |
+
),
|
126 |
gr.Dropdown(
|
127 |
label="Voice",
|
128 |
choices=[
|
|
|
167 |
import os
|
168 |
|
169 |
if (mode := os.getenv("MODE")) == "UI":
|
170 |
+
stream.ui.launch(server_port=7860)
|
171 |
elif mode == "PHONE":
|
172 |
stream.fastphone(host="0.0.0.0", port=7860)
|
173 |
else:
|