Multimodal-OCR

Runtime error

App Files Files Community

prithivMLmods commited on Jan 28

Commit

9522057

verified ·

1 Parent(s): 91cda81

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -34

app.py CHANGED Viewed

@@ -6,37 +6,16 @@ import time
 import torch
 import spaces
-# Define model options
-MODEL_OPTIONS = {
-    "Qwen2VL Base": "Qwen/Qwen2-VL-2B-Instruct",
-    "Latex OCR": "prithivMLmods/Qwen2-VL-OCR-2B-Instruct",
-    "Math Prase": "prithivMLmods/Qwen2-VL-Math-Prase-2B-Instruct",
-    "Text Analogy Ocrtest": "prithivMLmods/Qwen2-VL-Ocrtest-2B-Instruct"
-}
-# Default model setup
-current_model_id = MODEL_OPTIONS["Latex OCR"]
-processor = AutoProcessor.from_pretrained(current_model_id, trust_remote_code=True)
 model = Qwen2VLForConditionalGeneration.from_pretrained(
-    current_model_id,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to("cuda").eval()
 @spaces.GPU
-def model_inference(input_dict, history, model_id):
-    global model, processor
-    # Reload the model and processor if the model selection changes
-    if model_id != current_model_id:
-        current_model_id = model_id
-        processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
-        model = Qwen2VLForConditionalGeneration.from_pretrained(
-            model_id,
-            trust_remote_code=True,
-            torch_dtype=torch.float16
-        ).to("cuda").eval()
     text = input_dict["text"]
     files = input_dict["files"]
@@ -102,18 +81,12 @@ examples = [
     [{"text": "Can you describe this image?", "files": ["example_images/newyork.jpg"]}],
     [{"text": "Can you describe this image?", "files": ["example_images/dogs.jpg"]}],
     [{"text": "Where do the severe droughts happen according to this diagram?", "files": ["example_images/examples_weather_events.png"]}],
-]
-# Gradio components
-model_choice = gr.Dropdown(
-    label="Model Selection",
-    choices=list(MODEL_OPTIONS.keys()),
-    value="Latex OCR"
-)
 demo = gr.ChatInterface(
-    fn=lambda inputs, history: model_inference(inputs, history, MODEL_OPTIONS[model_choice.value]),
-    description="# **Qwen2.5-VL-3B-Instruct**",
     examples=examples,
     textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"),
     stop_btn="Stop Generation",

 import torch
 import spaces
+MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
+processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
 model = Qwen2VLForConditionalGeneration.from_pretrained(
+    MODEL_ID,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to("cuda").eval()
 @spaces.GPU
+def model_inference(input_dict, history):
     text = input_dict["text"]
     files = input_dict["files"]
     [{"text": "Can you describe this image?", "files": ["example_images/newyork.jpg"]}],
     [{"text": "Can you describe this image?", "files": ["example_images/dogs.jpg"]}],
     [{"text": "Where do the severe droughts happen according to this diagram?", "files": ["example_images/examples_weather_events.png"]}],
+]
 demo = gr.ChatInterface(
+    fn=model_inference,
+    description="# **Multimodal OCR**",
     examples=examples,
     textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"),
     stop_btn="Stop Generation",