Spaces:

AiCoderv2
/

Qwen-V3847-Any-To-Any

Runtime error

App Files Files Community

AiCoderv2 commited on Jul 31

Commit

455e799

verified ·

1 Parent(s): 98617f0

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -70

app.py CHANGED Viewed

@@ -1,77 +1,36 @@
 import gradio as gr
-import torch
-import soundfile as sf
-from transformers import Qwen2_5OmniForConditionalGeneration, Qwen2_5OmniProcessor
-from qwen_omni_utils import process_mm_info
-MODEL_ID = "Qwen/Qwen2.5-Omni-7B"
-# Load model & processor
-model = Qwen2_5OmniForConditionalGeneration.from_pretrained(
-    MODEL_ID, torch_dtype="auto", device_map="auto"
-)
-processor = Qwen2_5OmniProcessor.from_pretrained(MODEL_ID)
-SYSTEM_MESSAGE = {
-    "role": "system",
-    "content": [
-        {
-            "type": "text",
-            "text": "You are Qwen, a virtual human capable of understanding text, image, audio, and video, and responding with text and natural speech."
-        }
-    ],
-}
-def infer(conversation, use_audio=True):
-    # Apply template and extract modalities
-    text = processor.apply_chat_template(
-        conversation + [SYSTEM_MESSAGE],
-        add_generation_prompt=True,
-        tokenize=False,
-        return_dict=True,
-        use_audio_in_video=use_audio,
-    )
-    audios, images, videos = process_mm_info(conversation, use_audio_in_video=use_audio)
-    inputs = processor(text=text, audios=audios, images=images, videos=videos, return_tensors="pt", padding=True).to(model.device)
-    text_ids, audio = model.generate(**inputs, use_audio_in_video=use_audio)
-    reply_text = processor.batch_decode(text_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
-    audio_path = None
-    if audio is not None and use_audio:
-        audio_np = audio.reshape(-1).detach().cpu().numpy()
-        audio_path = "output.wav"
-        sf.write(audio_path, audio_np, samplerate=24000)
-    return reply_text, audio_path
-def chat_interface(input_text, uploaded_image=None, uploaded_audio=None, uploaded_video=None):
-    conv = [SYSTEM_MESSAGE]
-    user_message = []
-    if input_text:
-        user_message.append({"type": "text", "text": input_text})
-    if uploaded_image:
-        user_message.append({"type": "image", "image": uploaded_image.name})
-    if uploaded_audio:
-        user_message.append({"type": "audio", "audio": uploaded_audio.name})
-    if uploaded_video:
-        user_message.append({"type": "video", "video": uploaded_video.name})
-    conv.append({"role": "user", "content": user_message})
-    reply, audio_file = infer(conv)
-    return reply, audio_file
-# Gradio interface
-iface = gr.Interface(
-    fn=chat_interface,
-    inputs=[
-        gr.Textbox(label="Enter text"),
-        gr.File(label="Upload image"),
-        gr.File(label="Upload audio"),
-        gr.File(label="Upload video"),
-    ],
-    outputs=[gr.Textbox(label="Response"), gr.Audio(label="Speech Output")],
-    title="Qwen2.5‑Omni Multimodal Assistant",
-    description="Upload any image/audio/video + text prompt and get back text + speech",
-)
-iface.launch()

 import gradio as gr
+from train_and_serve import generate_video_with_audio
+import threading
+MAX_CPU = 16  # assume availability
+chat_history = []
+def chatbot_response(user_msg):
+    # simple echo + context
+    chat_history.append(("User", user_msg))
+    resp = f"I heard: '{user_msg}'. Ask me about video generation!"
+    chat_history.append(("Bot", resp))
+    return resp
+def handle_prompt(prompt, song_text):
+    vpath, apath = generate_video_with_audio(prompt, song_text=song_text)
+    return vpath, apath
+with gr.Blocks() as demo:
+    gr.Markdown("# 🎬 FineToon Video Chat & Generator")
+    with gr.Row():
+        with gr.Column(scale=2):
+            prompt = gr.Textbox(label="Video Prompt Text")
+            song = gr.Textbox(label="Optional Song Lyrics / Voice Text")
+            gen_btn = gr.Button("Generate Video")
+            video_out = gr.Video(label="Generated Video")
+            audio_out = gr.Audio(label="Generated Audio (Song / TTS)")
+        with gr.Column(scale=1):
+            chat_in = gr.Textbox(label="Chat with Assistant")
+            chat_out = gr.Chatbot(label="Conversation")
+    gen_btn.click(handle_prompt, inputs=[prompt, song], outputs=[video_out, audio_out])
+    chat_in.submit(lambda m: chatbot_response(m), inputs=chat_in, outputs=chat_out)
+    demo.queue(concurrency_count=1, max_size=4).launch()