Spaces:

beyoru
/

qew

Running

App Files Files Community

beyoru commited on Jan 5

Commit

7c5c8ba

verified ·

1 Parent(s): dcf00ef

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -148

app.py CHANGED Viewed

@@ -1,149 +1,92 @@
-import gradio as gr
-import spaces
-from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, TextIteratorStreamer
-from qwen_vl_utils import process_vision_info
-import torch
-from PIL import Image
-import subprocess
-import numpy as np
 import os
-from threading import Thread
-import uuid
-import io
-# Model and Processor Loading
-MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct"
-model = Qwen2VLForConditionalGeneration.from_pretrained(
-    MODEL_ID,
-    trust_remote_code=True,
-    torch_dtype=torch.float16
-).eval()
-processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
-DESCRIPTION = "[Qwen2-VL-2B Demo](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)"
-image_extensions = Image.registered_extensions()
-video_extensions = ("avi", "mp4", "mov", "mkv", "flv", "wmv", "mjpeg", "wav", "gif", "webm", "m4v", "3gp")
-def identify_and_save_blob(blob_path):
-    """Identifies if the blob is an image or video and saves it accordingly."""
-    try:
-        with open(blob_path, 'rb') as file:
-            blob_content = file.read()
-            try:
-                Image.open(io.BytesIO(blob_content)).verify()  # Check if it's a valid image
-                extension = ".png"  # Default to PNG for saving
-                media_type = "image"
-            except (IOError, SyntaxError):
-                extension = ".mp4"  # Default to MP4 for saving
-                media_type = "video"
-            filename = f"temp_{uuid.uuid4()}_media{extension}"
-            with open(filename, "wb") as f:
-                f.write(blob_content)
-            return filename, media_type
-    except Exception as e:
-        raise ValueError(f"Error processing the file: {e}")
-@spaces.GPU
-def qwen_inference(media_input, text_input=None, system_prompt=None, max_tokens=1024):
-    try:
-        media_type = None  # Initialize media_type variable
-        if isinstance(media_input, str):
-            media_path = media_input
-            if media_path.endswith(tuple([i for i, f in image_extensions.items()])):
-                media_type = "image"
-            elif media_path.endswith(video_extensions):
-                media_type = "video"
-            else:
-                # Handle the case where file format is unknown
-                media_path, media_type = identify_and_save_blob(media_input)
-        if not media_type:  # Check if media_type was assigned properly
-            raise ValueError("Unsupported media type. Please upload an image or video.")
-        # Default system prompt if none is provided
-        system_prompt = system_prompt or "You are a helpful assistant. Answer questions based on the image or video provided, and explain your reasoning clearly."
-        messages = [
-            {
-                "role": "system",
-                "content": system_prompt
-            },
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": media_type,
-                        media_type: media_path,
-                        **({"fps": 8.0} if media_type == "video" else {}),
-                    },
-                    {"type": "text", "text": text_input},
-                ],
-            }
-        ]
-        text = processor.apply_chat_template(
-            messages, tokenize=False, add_generation_prompt=True
-        )
-        image_inputs, video_inputs = process_vision_info(messages)
-        inputs = processor(
-            text=[text],
-            images=image_inputs,
-            videos=video_inputs,
-            padding=True,
-            return_tensors="pt",
-        )
-        streamer = TextIteratorStreamer(
-            processor, skip_prompt=True, **{"skip_special_tokens": True}
-        )
-        generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_tokens)
-        thread = Thread(target=model.generate, kwargs=generation_kwargs)
-        thread.start()
-        buffer = ""
-        for new_text in streamer:
-            buffer += new_text
-            yield buffer
-    except Exception as e:
-        yield f"Error during inference: {e}"
-css = """
-  #output {
-    height: 500px;
-    overflow: auto;
-    border: 1px solid #ccc;
-  }
-"""
-with gr.Blocks(css=css) as demo:
-    gr.Markdown(DESCRIPTION)
-    with gr.Tab(label="Image/Video Input"):
-        with gr.Row():
-            with gr.Column():
-                input_media = gr.File(label="Upload Image or Video", type="filepath")
-                system_prompt = gr.Textbox(
-                    label="System Prompt",
-                    value="You are a helpful assistant. Answer questions based on the image or video provided, and explain your reasoning clearly.",
-                    lines=3
-                )
-                text_input = gr.Textbox(label="Question")
-                max_tokens = gr.Slider(label="Max New Tokens", minimum=16, maximum=2048, value=1024, step=16)
-                submit_btn = gr.Button(value="Submit")
-            with gr.Column():
-                output_text = gr.Textbox(label="Output Text", elem_id="output")
-        submit_btn.click(
-            qwen_inference, [input_media, text_input, system_prompt, max_tokens], [output_text]
-        )
-demo.launch(debug=True)

 import os
+import gradio as gr
+from argparse import ArgumentParser
+import copy
+import tempfile
+import requests
+from http import HTTPStatus
+from dashscope import MultiModalConversation
+# Set environment variables and API key
+API_KEY = os.environ['API_KEY']
+dashscope.api_key = API_KEY
+# Define constants
+MODEL_NAME = 'Qwen2-VL-2B-Instruct'
+# Get arguments
+def _get_args():
+    parser = ArgumentParser()
+    parser.add_argument("--share", action="store_true", default=False, help="Create a publicly shareable link.")
+    parser.add_argument("--server-port", type=int, default=7860, help="Server port.")
+    parser.add_argument("--server-name", type=str, default="127.0.0.1", help="Server name.")
+    return parser.parse_args()
+# Simplify chat prediction
+def predict(_chatbot, task_history, system_prompt):
+    chat_query = _chatbot[-1][0]
+    query = task_history[-1][0]
+    if not chat_query:
+        _chatbot.pop()
+        task_history.pop()
+        return _chatbot
+    print("User:", query)
+    history_cp = copy.deepcopy(task_history)
+    messages = [{'role': 'user', 'content': [{'text': q}]} for q, _ in history_cp]
+    responses = MultiModalConversation.call(
+        model=MODEL_NAME, messages=messages, stream=True,
+    )
+    for response in responses:
+        if not response.status_code == HTTPStatus.OK:
+            raise Exception(f'Error: {response.message}')
+        response_text = ''.join([ele['text'] for ele in response.output.choices[0].message.content])
+        _chatbot[-1] = (chat_query, response_text)
+        yield _chatbot
+# Add text to history
+def add_text(history, task_history, text):
+    task_text = text
+    history.append((_parse_text(text), None))
+    task_history.append((task_text, None))
+    return history, task_history, ""
+# Reset input
+def reset_user_input():
+    return gr.update(value="")
+# Reset history
+def reset_state(task_history):
+    task_history.clear()
+    return []
+# Launch the demo
+def _launch_demo(args):
+    chatbot = gr.Chatbot(label='Qwen2-VL-2B-Instruct', height=500)
+    query = gr.Textbox(lines=2, label='Input')
+    system_prompt = gr.Textbox(lines=2, label='System Prompt', placeholder="Modify system prompt here...")
+    task_history = gr.State([])
+    with gr.Row():
+        submit_btn = gr.Button("🚀 Submit")
+        regen_btn = gr.Button("🤔️ Regenerate")
+        empty_bin = gr.Button("🧹 Clear History")
+    submit_btn.click(add_text, [chatbot, task_history, query], [chatbot, task_history]).then(
+        predict, [chatbot, task_history, system_prompt], [chatbot], show_progress=True
+    )
+    submit_btn.click(reset_user_input, [], [query])
+    empty_bin.click(reset_state, [task_history], [chatbot], show_progress=True)
+    regen_btn.click(predict, [chatbot, task_history, system_prompt], [chatbot], show_progress=True)
+    gr.Markdown("""<center><font size=3>Qwen2-VL-2B-Instruct Demo</center>""")
+    gr.Markdown("""<center><font size=2>Note: This demo uses Qwen2-VL-2B-Instruct model. Please be mindful of ethical content creation.</center>""")
+    demo.queue().launch(share=args.share, server_port=args.server_port, server_name=args.server_name)
+# Main function
+def main():
+    args = _get_args()
+    _launch_demo(args)
+if __name__ == '__main__':
+    main()