Spaces:

OP7
/

SG_TestSpace

Paused

App Files Files Community

OP7 commited on 16 days ago

Commit

699b814

verified ·

1 Parent(s): 9ff62b8

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -124

app.py CHANGED Viewed

@@ -1,85 +1,135 @@
-# # ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-# #
-# # This space is created by SANJOG GHONGE for testing and learning purpose.
-# #
-# # If you want to remove this space or credits please contact me on my email id [[email protected]].
-# #
-# # Citation : @misc{qvq-72b-preview,
-# #               title = {QVQ: To See the World with Wisdom},
-# #               url = {https://qwenlm.github.io/blog/qvq-72b-preview/},
-# #               author = {Qwen Team},
-# #               month = {December},
-# #               year = {2024}
-# #                  }
-# #           @article{Qwen2VL,
-# #               title={Qwen2-VL: Enhancing Vision-Language Model's Perception of the World at Any Resolution},
-# #               author={Wang, Peng and Bai, Shuai and Tan, Sinan and Wang, Shijie and Fan, Zhihao and Bai,
-# #               Jinze and Chen, Keqin and Liu, Xuejing and Wang, Jialin and Ge, Wenbin and Fan, Yang and Dang,
-# #               Kai and Du, Mengfei and Ren, Xuancheng and Men, Rui and Liu, Dayiheng and Zhou, Chang and Zhou,
-# #               Jingren and Lin, Junyang},
-# #               journal={arXiv preprint arXiv:2409.12191},
-# #               year={2024}
-# #                   }
-# #
-# # -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-# from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
-# from qwen_vl_utils import process_vision_info
 # import gradio as gr
-# from PIL import Image
-# # Load the model and processor
-# model = Qwen2VLForConditionalGeneration.from_pretrained(
-#     "Qwen/QVQ-72B-Preview", torch_dtype="auto", device_map="auto"
-# )
-# processor = AutoProcessor.from_pretrained("Qwen/QVQ-72B-Preview")
-# # Function to process the image and question
 # def process_image_and_question(image, question):
-#     if image is None or question.strip() == "":
 #         return "Please provide both an image and a question."
-#     # Prepare the input message
-#     messages = [
-#         {
-#             "role": "system",
-#             "content": [
-#                 {"type": "text", "text": "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step."}
-#             ],
-#         },
-#         {
-#             "role": "user",
-#             "content": [
-#                 {"type": "image", "image": image},
-#                 {"type": "text", "text": question},
-#             ],
-#         }
-#     ]
 #     # Process the inputs
-#     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-#     image_inputs, video_inputs = process_vision_info(messages)
-#     inputs = processor(
-#         text=[text],
-#         images=image_inputs,
-#         videos=video_inputs,
-#         padding=True,
-#         return_tensors="pt",
-#     )
-#     inputs = inputs.to("cuda")
 #     # Generate the output
-#     generated_ids = model.generate(**inputs, max_new_tokens=8192)
-#     generated_ids_trimmed = [
-#         out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
-#     ]
-#     output_text = processor.batch_decode(
-#         generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
-#     )
-#     return output_text[0] if output_text else "No output generated."
 # # Define the Gradio interface
 # with gr.Blocks() as demo:
@@ -103,53 +153,3 @@
 # # Launch the interface
 # demo.launch()
-# ------------------------------------------------------------------------------------------------------------------------------------
-import gradio as gr
-from transformers import AutoProcessor, AutoModelForImageTextToText
-# Load the processor and model
-model_name = "Qwen/QVQ-72B-Preview"
-processor = AutoProcessor.from_pretrained(model_name)
-model = AutoModelForImageTextToText.from_pretrained(model_name)
-# Define the prediction function
-def process_image_and_question(image, question):
-    if image is None or not question:
-        return "Please provide both an image and a question."
-    # Process the inputs
-    inputs = processor(images=image, text=question, return_tensors="pt")
-    # Generate the output
-    outputs = model.generate(**inputs)
-    answer = processor.batch_decode(outputs, skip_special_tokens=True)[0]
-    return answer
-# Define the Gradio interface
-with gr.Blocks() as demo:
-    gr.Markdown("# Image and Question Answering\nProvide an image (JPG/PNG) and a related question to get an answer.")
-    with gr.Row():
-        with gr.Column():
-            image_input = gr.Image(type="pil", label="Upload Image (JPG/PNG)")
-            question_input = gr.Textbox(label="Enter your question")
-        with gr.Column():
-            output_box = gr.Textbox(label="Result", interactive=False)
-    with gr.Row():
-        clear_button = gr.Button("Clear")
-        submit_button = gr.Button("Submit")
-    # Define button functionality
-    clear_button.click(lambda: (None, "", ""), inputs=[], outputs=[image_input, question_input, output_box])
-    submit_button.click(process_image_and_question, inputs=[image_input, question_input], outputs=output_box)
-# Launch the interface
-demo.launch()

+# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+#
+# This space is created by SANJOG GHONGE for testing and learning purpose.
+#
+# If you want to remove this space or credits please contact me on my email id [[email protected]].
+#
+# Citation : @misc{qvq-72b-preview,
+#               title = {QVQ: To See the World with Wisdom},
+#               url = {https://qwenlm.github.io/blog/qvq-72b-preview/},
+#               author = {Qwen Team},
+#               month = {December},
+#               year = {2024}
+#                  }
+#           @article{Qwen2VL,
+#               title={Qwen2-VL: Enhancing Vision-Language Model's Perception of the World at Any Resolution},
+#               author={Wang, Peng and Bai, Shuai and Tan, Sinan and Wang, Shijie and Fan, Zhihao and Bai,
+#               Jinze and Chen, Keqin and Liu, Xuejing and Wang, Jialin and Ge, Wenbin and Fan, Yang and Dang,
+#               Kai and Du, Mengfei and Ren, Xuancheng and Men, Rui and Liu, Dayiheng and Zhou, Chang and Zhou,
+#               Jingren and Lin, Junyang},
+#               journal={arXiv preprint arXiv:2409.12191},
+#               year={2024}
+#                   }
+#
+# -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
+from qwen_vl_utils import process_vision_info
+import gradio as gr
+from PIL import Image
+# Load the model and processor
+model = Qwen2VLForConditionalGeneration.from_pretrained(
+    "Qwen/QVQ-72B-Preview", torch_dtype="auto", device_map="auto"
+)
+processor = AutoProcessor.from_pretrained("Qwen/QVQ-72B-Preview")
+# Function to process the image and question
+def process_image_and_question(image, question):
+    if image is None or question.strip() == "":
+        return "Please provide both an image and a question."
+    # Prepare the input message
+    messages = [
+        {
+            "role": "system",
+            "content": [
+                {"type": "text", "text": "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step."}
+            ],
+        },
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": image},
+                {"type": "text", "text": question},
+            ],
+        }
+    ]
+    # Process the inputs
+    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    image_inputs, video_inputs = process_vision_info(messages)
+    inputs = processor(
+        text=[text],
+        images=image_inputs,
+        videos=video_inputs,
+        padding=True,
+        return_tensors="pt",
+    )
+    inputs = inputs.to("cuda")
+    # Generate the output
+    generated_ids = model.generate(**inputs, max_new_tokens=8192)
+    generated_ids_trimmed = [
+        out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    output_text = processor.batch_decode(
+        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+    )
+    return output_text[0] if output_text else "No output generated."
+# Define the Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Sanjog Image and Question Answering\nProvide an image (JPG/PNG) and a related question to get an answer.")
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(type="pil", label="Upload Image (JPG/PNG)")
+            question_input = gr.Textbox(label="Enter your question")
+        with gr.Column():
+            output_box = gr.Textbox(label="Result", interactive=False)
+    with gr.Row():
+        clear_button = gr.Button("Clear")
+        submit_button = gr.Button("Submit")
+    # Define button functionality
+    clear_button.click(lambda: (None, "", ""), inputs=[], outputs=[image_input, question_input, output_box])
+    submit_button.click(process_image_and_question, inputs=[image_input, question_input], outputs=output_box)
+# Launch the interface
+demo.launch()
+# ------------------------------------------------------------------------------------------------------------------------------------
 # import gradio as gr
+# from transformers import AutoProcessor, AutoModelForImageTextToText
+# # Load the processor and model
+# model_name = "Qwen/QVQ-72B-Preview"
+# processor = AutoProcessor.from_pretrained(model_name)
+# model = AutoModelForImageTextToText.from_pretrained(model_name)
+# # Define the prediction function
 # def process_image_and_question(image, question):
+#     if image is None or not question:
 #         return "Please provide both an image and a question."
 #     # Process the inputs
+#     inputs = processor(images=image, text=question, return_tensors="pt")
 #     # Generate the output
+#     outputs = model.generate(**inputs)
+#     answer = processor.batch_decode(outputs, skip_special_tokens=True)[0]
+#     return answer
 # # Define the Gradio interface
 # with gr.Blocks() as demo:
 # # Launch the interface
 # demo.launch()