Update app.py
Browse files
app.py
CHANGED
|
@@ -28,6 +28,22 @@ from streaming_stt_nemo import Model
|
|
| 28 |
from huggingface_hub import InferenceClient
|
| 29 |
import edge_tts
|
| 30 |
import asyncio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
theme = gr.themes.Base(
|
| 33 |
font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
|
|
@@ -519,7 +535,13 @@ with gr.Blocks() as voice2:
|
|
| 519 |
outputs=[output], live=True)
|
| 520 |
|
| 521 |
with gr.Blocks() as video:
|
| 522 |
-
gr.Markdown("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 523 |
|
| 524 |
with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}", title="GPT 4o DEMO") as demo:
|
| 525 |
gr.Markdown("# OpenGPT 4o")
|
|
|
|
| 28 |
from huggingface_hub import InferenceClient
|
| 29 |
import edge_tts
|
| 30 |
import asyncio
|
| 31 |
+
from transformers import pipeline
|
| 32 |
+
|
| 33 |
+
oracle = pipeline(model="dandelin/vilt-b32-finetuned-vqa")
|
| 34 |
+
|
| 35 |
+
async def answer_question(image, question):
|
| 36 |
+
response = oracle(question=question, image=image)
|
| 37 |
+
response2 = response[0]['answer']
|
| 38 |
+
answer2 = str(response2)
|
| 39 |
+
communicate = edge_tts.Communicate(answer2)
|
| 40 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
|
| 41 |
+
tmp_path = tmp_file.name
|
| 42 |
+
await communicate.save(tmp_path)
|
| 43 |
+
yield tmp_path
|
| 44 |
+
|
| 45 |
+
from gradio import Image, Textbox
|
| 46 |
+
|
| 47 |
|
| 48 |
theme = gr.themes.Base(
|
| 49 |
font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
|
|
|
|
| 535 |
outputs=[output], live=True)
|
| 536 |
|
| 537 |
with gr.Blocks() as video:
|
| 538 |
+
gr.Markdown(" ## Video Chat Beta")
|
| 539 |
+
gr.Markdown("### Click camera option to update image")
|
| 540 |
+
gr.Interface(
|
| 541 |
+
fn=answer_question,
|
| 542 |
+
inputs=[Image(type="filepath",sources="webcam", streaming=False), Textbox()],
|
| 543 |
+
outputs=[gr.Audio(autoplay=True)]
|
| 544 |
+
)
|
| 545 |
|
| 546 |
with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}", title="GPT 4o DEMO") as demo:
|
| 547 |
gr.Markdown("# OpenGPT 4o")
|