Spaces:

OP7
/

Test_nvidia_4xL40S

Paused

App Files Files Community

OP7 commited on Jan 27

Commit

a329065

verified ·

1 Parent(s): 9c4039d

Create app.py

Browse files

Files changed (1) hide show

app.py +107 -0

app.py ADDED Viewed

	@@ -0,0 +1,107 @@

+# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+#
+# This space is created by SANJOG GHONGE for testing and learning purpose.
+#
+# If you want to remove this space or credits please contact me on my email id [[email protected]].
+#
+# Citation : @misc{qvq-72b-preview,
+#               title = {QVQ: To See the World with Wisdom},
+#               url = {https://qwenlm.github.io/blog/qvq-72b-preview/},
+#               author = {Qwen Team},
+#               month = {December},
+#               year = {2024}
+#                  }
+#           @article{Qwen2VL,
+#               title={Qwen2-VL: Enhancing Vision-Language Model's Perception of the World at Any Resolution},
+#               author={Wang, Peng and Bai, Shuai and Tan, Sinan and Wang, Shijie and Fan, Zhihao and Bai,
+#               Jinze and Chen, Keqin and Liu, Xuejing and Wang, Jialin and Ge, Wenbin and Fan, Yang and Dang,
+#               Kai and Du, Mengfei and Ren, Xuancheng and Men, Rui and Liu, Dayiheng and Zhou, Chang and Zhou,
+#               Jingren and Lin, Junyang},
+#               journal={arXiv preprint arXiv:2409.12191},
+#               year={2024}
+#                   }
+#
+# -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
+from qwen_vl_utils import process_vision_info
+import gradio as gr
+from PIL import Image
+# Load the model and processor
+model = Qwen2VLForConditionalGeneration.from_pretrained(
+    "Qwen/QVQ-72B-Preview", torch_dtype="auto", device_map="auto"
+)
+processor = AutoProcessor.from_pretrained("Qwen/QVQ-72B-Preview")
+# Function to process the image and question
+def process_image_and_question(image, question):
+    if image is None or question.strip() == "":
+        return "Please provide both an image and a question."
+    # Prepare the input message
+    messages = [
+        {
+            "role": "system",
+            "content": [
+                {"type": "text", "text": "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step."}
+            ],
+        },
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": image},
+                {"type": "text", "text": question},
+            ],
+        }
+    ]
+    # Process the inputs
+    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    image_inputs, video_inputs = process_vision_info(messages)
+    inputs = processor(
+        text=[text],
+        images=image_inputs,
+        videos=video_inputs,
+        padding=True,
+        return_tensors="pt",
+    )
+    inputs = inputs.to("cuda")
+    # Generate the output
+    generated_ids = model.generate(**inputs, max_new_tokens=8192)
+    generated_ids_trimmed = [
+        out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    output_text = processor.batch_decode(
+        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+    )
+    return output_text[0] if output_text else "No output generated."
+# Define the Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Sanjog Test : Image and Question Answering\nProvide an image (JPG/PNG) and a related question to get an answer.")
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(type="pil", label="Upload Image (JPG/PNG)")
+            question_input = gr.Textbox(label="Enter your question")
+        with gr.Column():
+            output_box = gr.Textbox(label="Result", interactive=False)
+    with gr.Row():
+        clear_button = gr.Button("Clear")
+        submit_button = gr.Button("Submit")
+    # Define button functionality
+    clear_button.click(lambda: (None, "", ""), inputs=[], outputs=[image_input, question_input, output_box])
+    submit_button.click(process_image_and_question, inputs=[image_input, question_input], outputs=output_box)
+# Launch the interface
+demo.launch()