set token 128
Browse files
agent.py
CHANGED
|
@@ -52,7 +52,7 @@ class NewsReporterAgent:
|
|
| 52 |
return_tensors="pt"
|
| 53 |
).to(self.device, dtype=self.model.dtype)
|
| 54 |
|
| 55 |
-
outputs = self.model.generate(**inputs, max_new_tokens=
|
| 56 |
text = self.processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])
|
| 57 |
|
| 58 |
del inputs
|
|
|
|
| 52 |
return_tensors="pt"
|
| 53 |
).to(self.device, dtype=self.model.dtype)
|
| 54 |
|
| 55 |
+
outputs = self.model.generate(**inputs, max_new_tokens=128, disable_compile=True) # 64 token for faster inference
|
| 56 |
text = self.processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])
|
| 57 |
|
| 58 |
del inputs
|
app.py
CHANGED
|
@@ -118,7 +118,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Multimodal News Reporter") as demo
|
|
| 118 |
gr.Markdown("# 📰 Multimodal News Reporter AI")
|
| 119 |
gr.Markdown(
|
| 120 |
"- Upload an audio recording and/or a relevant image; the AI will generate a news report you can revise and save.\n"
|
| 121 |
-
"- Token output is set to
|
| 122 |
"- Note: This demo currently runs on CPU only.\n"
|
| 123 |
"- Sample audio is trimmed to 10 seconds for faster inference.\n"
|
| 124 |
"- Combined audio + image inference takes ~500-900 seconds; audio-only or image-only is much faster."
|
|
|
|
| 118 |
gr.Markdown("# 📰 Multimodal News Reporter AI")
|
| 119 |
gr.Markdown(
|
| 120 |
"- Upload an audio recording and/or a relevant image; the AI will generate a news report you can revise and save.\n"
|
| 121 |
+
"- Token output is set to 128 only for faster inference. \n"
|
| 122 |
"- Note: This demo currently runs on CPU only.\n"
|
| 123 |
"- Sample audio is trimmed to 10 seconds for faster inference.\n"
|
| 124 |
"- Combined audio + image inference takes ~500-900 seconds; audio-only or image-only is much faster."
|