Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -259,7 +259,8 @@ phi4_model = AutoModelForCausalLM.from_pretrained(
|
|
259 |
# ------------------------------------------------------------------------------
|
260 |
|
261 |
DESCRIPTION = """
|
262 |
-
# Agent Dino 🌠
|
|
|
263 |
|
264 |
css = '''
|
265 |
h1 {
|
@@ -468,7 +469,7 @@ def generate(
|
|
468 |
- "@web": triggers a web search or webpage visit.
|
469 |
- "@rAgent": initiates a reasoning chain using Llama mode.
|
470 |
- "@yolo": triggers object detection using YOLO.
|
471 |
-
- **"@phi4": triggers multimodal (image/audio) processing using the Phi-4 model.**
|
472 |
"""
|
473 |
text = input_dict["text"]
|
474 |
files = input_dict.get("files", [])
|
@@ -564,7 +565,7 @@ def generate(
|
|
564 |
yield gr.Image(result_img)
|
565 |
return
|
566 |
|
567 |
-
# --- Phi-4 Multimodal branch (Image/Audio) ---
|
568 |
if text.strip().lower().startswith("@phi4"):
|
569 |
question = text[len("@phi4"):].strip()
|
570 |
if not files:
|
@@ -603,18 +604,17 @@ def generate(
|
|
603 |
yield "Invalid file type for @phi4 multimodal processing."
|
604 |
return
|
605 |
|
606 |
-
|
607 |
-
|
608 |
-
|
609 |
-
|
610 |
-
|
611 |
-
|
612 |
-
|
613 |
-
|
614 |
-
|
615 |
-
|
616 |
-
|
617 |
-
yield response
|
618 |
return
|
619 |
|
620 |
# --- Text and TTS branch ---
|
|
|
259 |
# ------------------------------------------------------------------------------
|
260 |
|
261 |
DESCRIPTION = """
|
262 |
+
# Agent Dino 🌠
|
263 |
+
"""
|
264 |
|
265 |
css = '''
|
266 |
h1 {
|
|
|
469 |
- "@web": triggers a web search or webpage visit.
|
470 |
- "@rAgent": initiates a reasoning chain using Llama mode.
|
471 |
- "@yolo": triggers object detection using YOLO.
|
472 |
+
- **"@phi4": triggers multimodal (image/audio) processing using the Phi-4 model with streaming output.**
|
473 |
"""
|
474 |
text = input_dict["text"]
|
475 |
files = input_dict.get("files", [])
|
|
|
565 |
yield gr.Image(result_img)
|
566 |
return
|
567 |
|
568 |
+
# --- Phi-4 Multimodal branch (Image/Audio) with streaming ---
|
569 |
if text.strip().lower().startswith("@phi4"):
|
570 |
question = text[len("@phi4"):].strip()
|
571 |
if not files:
|
|
|
604 |
yield "Invalid file type for @phi4 multimodal processing."
|
605 |
return
|
606 |
|
607 |
+
# Set up a streamer for the phi4 model
|
608 |
+
streamer_phi4 = TextIteratorStreamer(phi4_processor, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
|
609 |
+
generation_kwargs_phi4 = {**inputs, "streamer": streamer_phi4, "max_new_tokens": 200}
|
610 |
+
thread_phi4 = Thread(target=phi4_model.generate, kwargs=generation_kwargs_phi4)
|
611 |
+
thread_phi4.start()
|
612 |
+
|
613 |
+
outputs_phi4 = []
|
614 |
+
yield "🤔 Thinking..."
|
615 |
+
for new_text in streamer_phi4:
|
616 |
+
outputs_phi4.append(new_text)
|
617 |
+
yield "".join(outputs_phi4)
|
|
|
618 |
return
|
619 |
|
620 |
# --- Text and TTS branch ---
|