set token 64
Browse files
agent.py
CHANGED
|
@@ -52,7 +52,7 @@ class NewsReporterAgent:
|
|
| 52 |
return_tensors="pt"
|
| 53 |
).to(self.device, dtype=self.model.dtype)
|
| 54 |
|
| 55 |
-
outputs = self.model.generate(**inputs, max_new_tokens=
|
| 56 |
text = self.processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])
|
| 57 |
|
| 58 |
del inputs
|
|
|
|
| 52 |
return_tensors="pt"
|
| 53 |
).to(self.device, dtype=self.model.dtype)
|
| 54 |
|
| 55 |
+
outputs = self.model.generate(**inputs, max_new_tokens=64, disable_compile=True) # 64 token for faster inference
|
| 56 |
text = self.processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])
|
| 57 |
|
| 58 |
del inputs
|
app.py
CHANGED
|
@@ -116,9 +116,13 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Multimodal News Reporter") as demo
|
|
| 116 |
agent_state = gr.State(value=None)
|
| 117 |
|
| 118 |
gr.Markdown("# 📰 Multimodal News Reporter AI")
|
| 119 |
-
gr.Markdown(
|
| 120 |
-
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
with gr.Row():
|
| 124 |
with gr.Column(scale=1):
|
|
|
|
| 116 |
agent_state = gr.State(value=None)
|
| 117 |
|
| 118 |
gr.Markdown("# 📰 Multimodal News Reporter AI")
|
| 119 |
+
gr.Markdown(
|
| 120 |
+
"- Upload an audio recording and/or a relevant image; the AI will generate a news report you can revise and save.\n"
|
| 121 |
+
"- Token output is set to 64 only for faster inference. \n"
|
| 122 |
+
"- Note: This demo currently runs on CPU only.\n"
|
| 123 |
+
"- Sample audio is trimmed to 10 seconds for faster inference.\n"
|
| 124 |
+
"- Combined audio + image inference takes ~500-900 seconds; audio-only or image-only is much faster."
|
| 125 |
+
)
|
| 126 |
|
| 127 |
with gr.Row():
|
| 128 |
with gr.Column(scale=1):
|