OCRArena / ui_helpers.py
Wassymk's picture
GPT 5
3e159b8
"""
UI Helpers Module
Contains UI formatting and helper functions for the Gradio interface.
"""
import logging
import random
import math
from typing import Dict, Any, List
# Configure logging
logger = logging.getLogger(__name__)
def get_model_display_name(model_name: str) -> str:
"""Get the display name for a model."""
model_names = {
"gemini": "Gemini 2.0 Flash",
"mistral": "Mistral OCR",
"openai": "OpenAI GPT-4o",
"gpt5": "OpenAI GPT-5"
}
return model_names.get(model_name, model_name)
def select_random_models() -> tuple[str, str]:
"""Randomly select two models from the available list including gpt5."""
models = ["gemini", "mistral", "openai", "gpt5"]
selected_models = random.sample(models, 2)
return selected_models[0], selected_models[1]
def format_votes_table(votes: List[Dict[str, Any]]) -> str:
"""Format votes data into an HTML table with OCR outputs and image thumbnails."""
if not votes:
return "<p>No votes found in the database.</p>"
# Sort votes by timestamp (latest first)
sorted_votes = sorted(votes, key=lambda x: x.get('timestamp', ''), reverse=True)
html = """
<div style="overflow-x: auto; max-width: 100%;">
<table class="vote-table" style="width: 100%; table-layout: fixed; font-size: 12px;">
<thead>
<tr>
<th style="width: 12%;">Timestamp</th>
<th style="width: 8%;">Username</th>
<th style="width: 10%;">Models</th>
<th style="width: 8%;">Vote</th>
<th style="width: 25%;">Model A Output</th>
<th style="width: 25%;">Model B Output</th>
<th style="width: 12%;">Image</th>
</tr>
</thead>
<tbody>
"""
for vote in sorted_votes:
timestamp = vote.get('timestamp', 'N/A')
username = vote.get('username', 'N/A')
model_a = vote.get('model_a', 'N/A')
model_b = vote.get('model_b', 'N/A')
vote_choice = vote.get('vote', 'N/A')
model_a_output = vote.get('model_a_output', 'N/A')
model_b_output = vote.get('model_b_output', 'N/A')
image_url = vote.get('image_url', 'N/A')
# Format timestamp - handle both ISO format and our custom format
if timestamp != 'N/A':
try:
from datetime import datetime
# Check if it's already in our desired format
if len(timestamp) == 19 and timestamp[10] == ' ':
# Already in YYYY-MM-DD HH:MM:SS format
formatted_time = timestamp
else:
# Convert from ISO format to our format
dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
formatted_time = dt.strftime('%Y-%m-%d %H:%M:%S')
except:
formatted_time = timestamp
else:
formatted_time = 'N/A'
# Get model display names
model_a_name = get_model_display_name(model_a)
model_b_name = get_model_display_name(model_b)
models_display = f"{model_a_name} vs {model_b_name}"
# Determine which model was voted for and get its display name
voted_model_name = ""
vote_color = "gray"
if vote_choice == "model_a":
voted_model_name = model_a_name
vote_color = "green"
elif vote_choice == "model_b":
voted_model_name = model_b_name
vote_color = "blue"
# Truncate OCR outputs for table display (shorter for better fit)
model_a_preview = model_a_output[:80] + "..." if len(model_a_output) > 80 else model_a_output
model_b_preview = model_b_output[:80] + "..." if len(model_b_output) > 80 else model_b_output
# Fix image URL - use the correct Supabase storage URL format
if image_url and image_url != 'N/A' and not image_url.startswith('http'):
# If it's just a path, construct the full URL
import os
image_url = f"{os.getenv('SUPABASE_URL')}/storage/v1/object/public/images/{image_url}"
# Create image thumbnail or placeholder
if image_url and image_url != 'N/A':
image_html = f'<img src="{image_url}" alt="OCR Image" style="width: 60px; height: 45px; object-fit: cover; border-radius: 4px; cursor: pointer;" onclick="window.open(\'{image_url}\', \'_blank\')" title="Click to view full image">'
else:
image_html = '<span style="color: #999; font-style: italic;">No image</span>'
html += f"""
<tr>
<td style="word-wrap: break-word; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;">{formatted_time}</td>
<td style="word-wrap: break-word; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;"><strong>{username}</strong></td>
<td style="word-wrap: break-word; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;"><small>{models_display}</small></td>
<td style="color: {vote_color}; font-weight: bold; word-wrap: break-word; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;">{voted_model_name}</td>
<td style="word-wrap: break-word; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;" title="{model_a_output}">{model_a_preview}</td>
<td style="word-wrap: break-word; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;" title="{model_b_output}">{model_b_preview}</td>
<td style="text-align: center;">{image_html}</td>
</tr>
"""
html += """
</tbody>
</table>
</div>
"""
return html
def format_elo_leaderboard(elo_ratings: Dict[str, float], vote_counts: Dict[str, int] = None) -> str:
"""Format ELO ratings into a leaderboard HTML table."""
# Sort models by ELO rating (highest first)
sorted_models = sorted(elo_ratings.items(), key=lambda x: x[1], reverse=True)
html = """
<div style="padding: 15px; background-color: #f8f9fa; border-radius: 8px;">
<h3>ELO Leaderboard</h3>
<p><em>Models are ranked by their ELO rating. Higher ratings indicate better performance.</em></p>
<table class="vote-table" style="margin-top: 15px;">
<thead>
<tr>
<th>Rank</th>
<th>Model</th>
<th>ELO Rating</th>
<th>Total Votes</th>
</tr>
</thead>
<tbody>
"""
for rank, (model, rating) in enumerate(sorted_models, 1):
# Get model display name
display_name = get_model_display_name(model)
# Get vote count for this model
vote_count = vote_counts.get(model, 0) if vote_counts else 0
html += f"""
<tr>
<td style="font-weight: bold; text-align: center;">{rank}</td>
<td><strong>{display_name}</strong></td>
<td style="font-weight: bold;">{rating:.0f}</td>
<td style="text-align: center;">{vote_count}</td>
</tr>
"""
html += """
</tbody>
</table>
</div>
"""
return html