Spaces:

Wassymk
/

OCRArena

Running

App Files Files Community

Wassymk commited on 26 days ago

Commit

3e159b8

1 Parent(s): faea0f8

GPT 5

Browse files

Files changed (4) hide show

app.py +38 -34
db.py +10 -2
ocr_models.py +33 -0
ui_helpers.py +5 -4

app.py CHANGED Viewed

@@ -36,6 +36,8 @@ SUPABASE_KEY = os.getenv("SUPABASE_KEY")
 current_gemini_output = ""
 current_mistral_output = ""
 current_openai_output = ""
 current_image_url = ""
 current_voted_users = set()  # Track users who have already voted
 current_model_a = ""  # Store which model was selected as model A
@@ -74,8 +76,7 @@ def process_image(image):
             "Please upload an image.",
             "Please upload an image.",
             gr.update(visible=False),  # Hide vote buttons
-            gr.update(visible=False),  # Hide vote buttons
-            "Please upload an image to start (voting is optional)."
         )
     # Reset voted users for new image
@@ -123,8 +124,7 @@ def process_image(image):
             "Please click 'Run OCR' to start processing.",
             "Please click 'Run OCR' to start processing.",
             gr.update(visible=False),  # Hide vote buttons initially
-            gr.update(visible=False),  # Hide vote buttons initially
-            "Image uploaded! Two models have been randomly selected. Click 'Run OCR' to process."
         )
     except Exception as e:
@@ -133,13 +133,12 @@ def process_image(image):
             f"Error processing image: {e}",
             f"Error processing image: {e}",
             gr.update(visible=False),  # Hide vote buttons
-            gr.update(visible=False),  # Hide vote buttons
-            f"Error: {e}"
         )
 def check_ocr_completion(model_a_output, model_b_output):
     """Check if both OCR results are ready and update UI accordingly."""
-    global current_gemini_output, current_mistral_output, current_openai_output, current_model_a, current_model_b
     # Check if both results are complete (not processing messages)
     model_a_ready = (model_a_output and
@@ -162,6 +161,8 @@ def check_ocr_completion(model_a_output, model_b_output):
             current_mistral_output = model_a_output
         elif current_model_a == "openai":
             current_openai_output = model_a_output
     if model_b_ready:
         if current_model_b == "gemini":
@@ -170,26 +171,19 @@ def check_ocr_completion(model_a_output, model_b_output):
             current_mistral_output = model_b_output
         elif current_model_b == "openai":
             current_openai_output = model_b_output
     # Show vote buttons only when both are ready
     if model_a_ready and model_b_ready:
         return (
             gr.update(visible=True),  # Show Model A vote button
-            gr.update(visible=True),  # Show Model B vote button
-            "OCR completed! You can now vote for your preferred result (optional)."
-        )
-    elif model_a_ready or model_b_ready:
-        ready_count = sum([model_a_ready, model_b_ready])
-        return (
-            gr.update(visible=False),  # Hide vote buttons
-            gr.update(visible=False),  # Hide vote buttons
-            f"OCR in progress... ({ready_count}/2 completed)"
         )
     else:
         return (
             gr.update(visible=False),  # Hide vote buttons
-            gr.update(visible=False),  # Hide vote buttons
-            "Processing OCR results..."
         )
 def load_vote_data():
@@ -321,13 +315,7 @@ with gr.Blocks(title="OCR Comparison", css="""
                     mistral_vote_btn = gr.Button("B is better", variant="primary", size="sm", visible=False)
                     mistral_output = gr.Markdown(label="Model B Output", elem_classes=["output-box"])
-            # Status indicator
-            status_text = gr.Textbox(
-                label="Status",
-                placeholder="Upload an image and run OCR to compare results (voting is optional)",
-                interactive=False,
-                show_label=False
-            )
             with gr.Row():
                 process_btn = gr.Button("🔍 Run OCR", variant="primary")
@@ -362,7 +350,7 @@ with gr.Blocks(title="OCR Comparison", css="""
     # Vote functions
     def vote_model_a(profile_or_username):
-        global current_gemini_output, current_mistral_output, current_openai_output, current_image_url, current_voted_users, current_model_a, current_model_b
         # Get current username
         username = get_current_username(profile_or_username)
@@ -381,12 +369,20 @@ with gr.Blocks(title="OCR Comparison", css="""
             # Add vote to database
             logger.info(f"📊 Adding Model A vote for user: {username}")
             add_vote(
                 username=username,
                 model_a=current_model_a,
                 model_b=current_model_b,
-                model_a_output=current_gemini_output if current_model_a == "gemini" else current_mistral_output if current_model_a == "mistral" else current_openai_output,
-                model_b_output=current_gemini_output if current_model_b == "gemini" else current_mistral_output if current_model_b == "mistral" else current_openai_output,
                 vote="model_a",
                 image_url=image_url
             )
@@ -408,7 +404,7 @@ with gr.Blocks(title="OCR Comparison", css="""
             gr.Info(f"Error recording vote: {e}")
     def vote_model_b(profile_or_username):
-        global current_gemini_output, current_mistral_output, current_openai_output, current_image_url, current_voted_users, current_model_a, current_model_b
         # Get current username
         username = get_current_username(profile_or_username)
@@ -427,12 +423,20 @@ with gr.Blocks(title="OCR Comparison", css="""
             # Add vote to database
             logger.info(f"📊 Adding Model B vote for user: {username}")
             add_vote(
                 username=username,
                 model_a=current_model_a,
                 model_b=current_model_b,
-                model_a_output=current_gemini_output if current_model_a == "gemini" else current_mistral_output if current_model_a == "mistral" else current_openai_output,
-                model_b_output=current_gemini_output if current_model_b == "gemini" else current_mistral_output if current_model_b == "mistral" else current_openai_output,
                 vote="model_b",
                 image_url=image_url
             )
@@ -457,7 +461,7 @@ with gr.Blocks(title="OCR Comparison", css="""
     process_btn.click(
         process_image,
         inputs=[image_input],
-        outputs=[gemini_output, mistral_output, gemini_vote_btn, mistral_vote_btn, status_text],
     )
     # Process both randomly selected OCRs when the process button is clicked
@@ -485,13 +489,13 @@ with gr.Blocks(title="OCR Comparison", css="""
     gemini_output.change(
         check_ocr_completion,
         inputs=[gemini_output, mistral_output],
-        outputs=[gemini_vote_btn, mistral_vote_btn, status_text],
     )
     mistral_output.change(
         check_ocr_completion,
         inputs=[gemini_output, mistral_output],
-        outputs=[gemini_vote_btn, mistral_vote_btn, status_text],
     )
     gemini_vote_btn.click(

 current_gemini_output = ""
 current_mistral_output = ""
 current_openai_output = ""
+current_gpt5_output = ""
+current_gpt5_output = ""
 current_image_url = ""
 current_voted_users = set()  # Track users who have already voted
 current_model_a = ""  # Store which model was selected as model A
             "Please upload an image.",
             "Please upload an image.",
             gr.update(visible=False),  # Hide vote buttons
+            gr.update(visible=False)   # Hide vote buttons
         )
     # Reset voted users for new image
             "Please click 'Run OCR' to start processing.",
             "Please click 'Run OCR' to start processing.",
             gr.update(visible=False),  # Hide vote buttons initially
+            gr.update(visible=False)   # Hide vote buttons initially
         )
     except Exception as e:
             f"Error processing image: {e}",
             f"Error processing image: {e}",
             gr.update(visible=False),  # Hide vote buttons
+            gr.update(visible=False)   # Hide vote buttons
         )
 def check_ocr_completion(model_a_output, model_b_output):
     """Check if both OCR results are ready and update UI accordingly."""
+    global current_gemini_output, current_mistral_output, current_openai_output, current_gpt5_output, current_model_a, current_model_b
     # Check if both results are complete (not processing messages)
     model_a_ready = (model_a_output and
             current_mistral_output = model_a_output
         elif current_model_a == "openai":
             current_openai_output = model_a_output
+        elif current_model_a == "gpt5":
+            current_gpt5_output = model_a_output
     if model_b_ready:
         if current_model_b == "gemini":
             current_mistral_output = model_b_output
         elif current_model_b == "openai":
             current_openai_output = model_b_output
+        elif current_model_b == "gpt5":
+            current_gpt5_output = model_b_output
     # Show vote buttons only when both are ready
     if model_a_ready and model_b_ready:
         return (
             gr.update(visible=True),  # Show Model A vote button
+            gr.update(visible=True)   # Show Model B vote button
         )
     else:
         return (
             gr.update(visible=False),  # Hide vote buttons
+            gr.update(visible=False)   # Hide vote buttons
         )
 def load_vote_data():
                     mistral_vote_btn = gr.Button("B is better", variant="primary", size="sm", visible=False)
                     mistral_output = gr.Markdown(label="Model B Output", elem_classes=["output-box"])
             with gr.Row():
                 process_btn = gr.Button("🔍 Run OCR", variant="primary")
     # Vote functions
     def vote_model_a(profile_or_username):
+        global current_gemini_output, current_mistral_output, current_openai_output, current_gpt5_output, current_image_url, current_voted_users, current_model_a, current_model_b
         # Get current username
         username = get_current_username(profile_or_username)
             # Add vote to database
             logger.info(f"📊 Adding Model A vote for user: {username}")
+            def output_for(model: str) -> str:
+                return {
+                    "gemini": current_gemini_output,
+                    "mistral": current_mistral_output,
+                    "openai": current_openai_output,
+                    "gpt5": current_gpt5_output,
+                }.get(model, "")
             add_vote(
                 username=username,
                 model_a=current_model_a,
                 model_b=current_model_b,
+                model_a_output=output_for(current_model_a),
+                model_b_output=output_for(current_model_b),
                 vote="model_a",
                 image_url=image_url
             )
             gr.Info(f"Error recording vote: {e}")
     def vote_model_b(profile_or_username):
+        global current_gemini_output, current_mistral_output, current_openai_output, current_gpt5_output, current_image_url, current_voted_users, current_model_a, current_model_b
         # Get current username
         username = get_current_username(profile_or_username)
             # Add vote to database
             logger.info(f"📊 Adding Model B vote for user: {username}")
+            def output_for(model: str) -> str:
+                return {
+                    "gemini": current_gemini_output,
+                    "mistral": current_mistral_output,
+                    "openai": current_openai_output,
+                    "gpt5": current_gpt5_output,
+                }.get(model, "")
             add_vote(
                 username=username,
                 model_a=current_model_a,
                 model_b=current_model_b,
+                model_a_output=output_for(current_model_a),
+                model_b_output=output_for(current_model_b),
                 vote="model_b",
                 image_url=image_url
             )
     process_btn.click(
         process_image,
         inputs=[image_input],
+        outputs=[gemini_output, mistral_output, gemini_vote_btn, mistral_vote_btn],
     )
     # Process both randomly selected OCRs when the process button is clicked
     gemini_output.change(
         check_ocr_completion,
         inputs=[gemini_output, mistral_output],
+        outputs=[gemini_vote_btn, mistral_vote_btn],
     )
     mistral_output.change(
         check_ocr_completion,
         inputs=[gemini_output, mistral_output],
+        outputs=[gemini_vote_btn, mistral_vote_btn],
     )
     gemini_vote_btn.click(

db.py CHANGED Viewed

@@ -146,6 +146,7 @@ def get_vote_statistics() -> Dict[str, Any]:
         gemini_votes = 0
         mistral_votes = 0
         openai_votes = 0
         total_votes = len(votes)
         for vote in votes:
@@ -160,6 +161,8 @@ def get_vote_statistics() -> Dict[str, Any]:
                     mistral_votes += 1
                 elif model_a == 'openai':
                     openai_votes += 1
             elif vote_choice == 'model_b':
                 if model_b == 'gemini':
                     gemini_votes += 1
@@ -167,15 +170,19 @@ def get_vote_statistics() -> Dict[str, Any]:
                     mistral_votes += 1
                 elif model_b == 'openai':
                     openai_votes += 1
         return {
             "total_votes": total_votes,
             "gemini_votes": gemini_votes,
             "mistral_votes": mistral_votes,
             "openai_votes": openai_votes,
             "gemini_percentage": (gemini_votes / total_votes * 100) if total_votes > 0 else 0,
             "mistral_percentage": (mistral_votes / total_votes * 100) if total_votes > 0 else 0,
-            "openai_percentage": (openai_votes / total_votes * 100) if total_votes > 0 else 0
         }
     except Exception as e:
         logger.error(f"❌ Error getting vote statistics: {e}")
@@ -218,7 +225,8 @@ def calculate_elo_ratings_from_votes(votes: List[Dict[str, Any]]) -> Dict[str, f
     elo_ratings = {
         "gemini": 1500,
         "mistral": 1500,
-        "openai": 1500
     }
     # Process each vote to update ELO ratings

         gemini_votes = 0
         mistral_votes = 0
         openai_votes = 0
+        gpt5_votes = 0
         total_votes = len(votes)
         for vote in votes:
                     mistral_votes += 1
                 elif model_a == 'openai':
                     openai_votes += 1
+                elif model_a == 'gpt5':
+                    gpt5_votes += 1
             elif vote_choice == 'model_b':
                 if model_b == 'gemini':
                     gemini_votes += 1
                     mistral_votes += 1
                 elif model_b == 'openai':
                     openai_votes += 1
+                elif model_b == 'gpt5':
+                    gpt5_votes += 1
         return {
             "total_votes": total_votes,
             "gemini_votes": gemini_votes,
             "mistral_votes": mistral_votes,
             "openai_votes": openai_votes,
+            "gpt5_votes": gpt5_votes,
             "gemini_percentage": (gemini_votes / total_votes * 100) if total_votes > 0 else 0,
             "mistral_percentage": (mistral_votes / total_votes * 100) if total_votes > 0 else 0,
+            "openai_percentage": (openai_votes / total_votes * 100) if total_votes > 0 else 0,
+            "gpt5_percentage": (gpt5_votes / total_votes * 100) if total_votes > 0 else 0
         }
     except Exception as e:
         logger.error(f"❌ Error getting vote statistics: {e}")
     elo_ratings = {
         "gemini": 1500,
         "mistral": 1500,
+        "openai": 1500,
+        "gpt5": 1500
     }
     # Process each vote to update ELO ratings

ocr_models.py CHANGED Viewed

@@ -115,6 +115,37 @@ def openai_ocr(image: Image.Image):
         logger.error(f"OpenAI OCR error: {e}")
         return f"OpenAI OCR error: {e}"
 def process_model_ocr(image, model_name):
     """Process OCR for a specific model."""
     if model_name == "gemini":
@@ -123,6 +154,8 @@ def process_model_ocr(image, model_name):
         return mistral_ocr(image)
     elif model_name == "openai":
         return openai_ocr(image)
     else:
         return f"Unknown model: {model_name}"

         logger.error(f"OpenAI OCR error: {e}")
         return f"OpenAI OCR error: {e}"
+def gpt5_ocr(image: Image.Image):
+    """Process OCR using OpenAI's GPT-5 model with the same prompt."""
+    try:
+        # Convert image to base64 (PNG) and use as data URL
+        buffered = io.BytesIO()
+        image.save(buffered, format="PNG")
+        img_bytes = buffered.getvalue()
+        base64_image = base64.b64encode(img_bytes).decode('utf-8')
+        image_data_url = f"data:image/png;base64,{base64_image}"
+        # Use Chat Completions style content for multimodal reliability
+        response = openai.chat.completions.create(
+            model="gpt-5",
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "Extract and transcribe all text from this image. Return only the transcribed text in markdown format, preserving any formatting like headers, lists, etc."},
+                        {"type": "image_url", "image_url": {"url": image_data_url}}
+                    ]
+                }
+            ]
+        )
+        markdown_text = response.choices[0].message.content
+        logger.info("GPT-5 OCR completed successfully")
+        return markdown_text
+    except Exception as e:
+        logger.error(f"GPT-5 OCR error: {e}")
+        return f"GPT-5 OCR error: {e}"
 def process_model_ocr(image, model_name):
     """Process OCR for a specific model."""
     if model_name == "gemini":
         return mistral_ocr(image)
     elif model_name == "openai":
         return openai_ocr(image)
+    elif model_name == "gpt5":
+        return gpt5_ocr(image)
     else:
         return f"Unknown model: {model_name}"

ui_helpers.py CHANGED Viewed

@@ -16,13 +16,14 @@ def get_model_display_name(model_name: str) -> str:
     model_names = {
         "gemini": "Gemini 2.0 Flash",
         "mistral": "Mistral OCR",
-        "openai": "OpenAI GPT-4o"
     }
     return model_names.get(model_name, model_name)
 def select_random_models() -> tuple[str, str]:
-    """Randomly select two models from the three available: gemini, mistral, openai."""
-    models = ["gemini", "mistral", "openai"]
     selected_models = random.sample(models, 2)
     return selected_models[0], selected_models[1]
@@ -147,7 +148,7 @@ def format_elo_leaderboard(elo_ratings: Dict[str, float], vote_counts: Dict[str,
                     <th>Rank</th>
                     <th>Model</th>
                     <th>ELO Rating</th>
-                    <th>Total Votes</th>
                 </tr>
             </thead>
             <tbody>

     model_names = {
         "gemini": "Gemini 2.0 Flash",
         "mistral": "Mistral OCR",
+        "openai": "OpenAI GPT-4o",
+        "gpt5": "OpenAI GPT-5"
     }
     return model_names.get(model_name, model_name)
 def select_random_models() -> tuple[str, str]:
+    """Randomly select two models from the available list including gpt5."""
+    models = ["gemini", "mistral", "openai", "gpt5"]
     selected_models = random.sample(models, 2)
     return selected_models[0], selected_models[1]
                     <th>Rank</th>
                     <th>Model</th>
                     <th>ELO Rating</th>
+            <th>Total Votes</th>
                 </tr>
             </thead>
             <tbody>