Spaces:

beyoru
/

qew

Running

App Files Files Community

beyoru commited on Jan 5

Commit

9680c53

verified ·

1 Parent(s): dc22316

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -80

app.py CHANGED Viewed

@@ -1,92 +1,84 @@
-import os
 import gradio as gr
-from argparse import ArgumentParser
-import copy
-import tempfile
 import requests
-from http import HTTPStatus
-from dashscope import MultiModalConversation
-# Set environment variables and API key
-API_KEY = os.environ['API_KEY']
-dashscope.api_key = API_KEY
-# Define constants
-MODEL_NAME = 'Qwen2-VL-2B-Instruct'
-# Get arguments
-def _get_args():
-    parser = ArgumentParser()
-    parser.add_argument("--share", action="store_true", default=False, help="Create a publicly shareable link.")
-    parser.add_argument("--server-port", type=int, default=7860, help="Server port.")
-    parser.add_argument("--server-name", type=str, default="127.0.0.1", help="Server name.")
-    return parser.parse_args()
-# Simplify chat prediction
-def predict(_chatbot, task_history, system_prompt):
-    chat_query = _chatbot[-1][0]
-    query = task_history[-1][0]
-    if not chat_query:
-        _chatbot.pop()
-        task_history.pop()
-        return _chatbot
-    print("User:", query)
-    history_cp = copy.deepcopy(task_history)
-    messages = [{'role': 'user', 'content': [{'text': q}]} for q, _ in history_cp]
-    responses = MultiModalConversation.call(
-        model=MODEL_NAME, messages=messages, stream=True,
     )
-    for response in responses:
-        if not response.status_code == HTTPStatus.OK:
-            raise Exception(f'Error: {response.message}')
-        response_text = ''.join([ele['text'] for ele in response.output.choices[0].message.content])
-        _chatbot[-1] = (chat_query, response_text)
-        yield _chatbot
-# Add text to history
-def add_text(history, task_history, text):
-    task_text = text
-    history.append((_parse_text(text), None))
-    task_history.append((task_text, None))
-    return history, task_history, ""
-# Reset input
-def reset_user_input():
-    return gr.update(value="")
-# Reset history
-def reset_state(task_history):
-    task_history.clear()
-    return []
-# Launch the demo
-def _launch_demo(args):
-    chatbot = gr.Chatbot(label='Qwen2-VL-2B-Instruct', height=500)
-    query = gr.Textbox(lines=2, label='Input')
-    system_prompt = gr.Textbox(lines=2, label='System Prompt', placeholder="Modify system prompt here...")
-    task_history = gr.State([])
-    with gr.Row():
-        submit_btn = gr.Button("🚀 Submit")
-        regen_btn = gr.Button("🤔️ Regenerate")
-        empty_bin = gr.Button("🧹 Clear History")
-    submit_btn.click(add_text, [chatbot, task_history, query], [chatbot, task_history]).then(
-        predict, [chatbot, task_history, system_prompt], [chatbot], show_progress=True
     )
-    submit_btn.click(reset_user_input, [], [query])
-    empty_bin.click(reset_state, [task_history], [chatbot], show_progress=True)
-    regen_btn.click(predict, [chatbot, task_history, system_prompt], [chatbot], show_progress=True)
-    gr.Markdown("""<center><font size=3>Qwen2-VL-2B-Instruct Demo</center>""")
-    gr.Markdown("""<center><font size=2>Note: This demo uses Qwen2-VL-2B-Instruct model. Please be mindful of ethical content creation.</center>""")
-    demo.queue().launch(share=args.share, server_port=args.server_port, server_name=args.server_name)
-# Main function
-def main():
-    args = _get_args()
-    _launch_demo(args)
-if __name__ == '__main__':
-    main()

 import gradio as gr
+from PIL import Image
+import torch
+from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
 import requests
+# Load the model and processor
+model = Qwen2VLForConditionalGeneration.from_pretrained(
+    "Qwen/Qwen2-VL-2B-Instruct", torch_dtype="auto",
+)
+processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
+# Process text and image for inference
+def generate_response(messages: list):
+    # Preprocess conversation (text + image)
+    text_prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
+    # Prepare input tensors
+    images = [msg.get("image") for msg in messages if msg.get("image")]
+    text = [text_prompt]
+    inputs = processor(
+        text=text, images=images, padding=True, return_tensors="pt"
     )
+    # Inference: Generate the output
+    output_ids = model.generate(**inputs, max_new_tokens=128)
+    generated_ids = [
+        output_ids[len(input_ids) :]
+        for input_ids, output_ids in zip(inputs.input_ids, output_ids)
+    ]
+    output_text = processor.batch_decode(
+        generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
+    )
+    return output_text[0]
+# Gradio chat interface function
+def chat_interface(user_input, image: Image = None, history=[]):
+    # Add user input to the history
+    if image:
+        message = {
+            "role": "user",
+            "content": [{"type": "image", "image": image}, {"type": "text", "text": user_input}],
+        }
+    else:
+        message = {
+            "role": "user",
+            "content": [{"type": "text", "text": user_input}],
+        }
+    history.append(message)
+    # Get model response
+    response = generate_response(history)
+    # Add model response to the history
+    history.append({"role": "assistant", "content": [{"type": "text", "text": response}]})
+    return history, response
+# Gradio chat interface setup
+def create_gradio_interface():
+    # Chat interface with image upload and text input
+    interface = gr.Interface(
+        fn=chat_interface,
+        inputs=[
+            gr.Textbox(type="text", label="Your Message"),
+            gr.Image(type="pil", label="Upload an Image", optional=True)
+        ],
+        outputs=[
+            gr.Chatbot(label="Chatbot"),
+            gr.Textbox(label="Model's Response")
+        ],
+        title="Chat with Vision Model",
+        description="This is a multimodal model where you can chat with it using both images and text inputs. The model will respond accordingly based on your input.",
+        allow_flagging="never"
     )
+    return interface
+# Run the Gradio app
+if __name__ == "__main__":
+    interface = create_gradio_interface()
+    interface.launch()