OCRArena / ui_helpers.py
Wassymk's picture
ui fix
e6e2e79
raw
history blame
7.27 kB
"""
UI Helpers Module
Contains UI formatting and helper functions for the Gradio interface.
"""
import logging
import random
import math
from typing import Dict, Any, List
# Configure logging
logger = logging.getLogger(__name__)
def get_model_display_name(model_name: str) -> str:
"""Get the display name for a model."""
model_names = {
"gemini": "Gemini 2.0 Flash",
"mistral": "Mistral OCR",
"openai": "OpenAI GPT-4o"
}
return model_names.get(model_name, model_name)
def select_random_models() -> tuple[str, str]:
"""Randomly select two models from the three available: gemini, mistral, openai."""
models = ["gemini", "mistral", "openai"]
selected_models = random.sample(models, 2)
return selected_models[0], selected_models[1]
def format_votes_table(votes: List[Dict[str, Any]]) -> str:
"""Format votes data into an HTML table with OCR outputs and image thumbnails."""
if not votes:
return "<p>No votes found in the database.</p>"
# Sort votes by timestamp (latest first)
sorted_votes = sorted(votes, key=lambda x: x.get('timestamp', ''), reverse=True)
html = """
<div style="overflow-x: auto; max-width: 100%;">
<table class="vote-table" style="width: 100%; table-layout: fixed; font-size: 12px;">
<thead>
<tr>
<th style="width: 12%;">Timestamp</th>
<th style="width: 8%;">Username</th>
<th style="width: 10%;">Models</th>
<th style="width: 8%;">Vote</th>
<th style="width: 25%;">Model A Output</th>
<th style="width: 25%;">Model B Output</th>
<th style="width: 12%;">Image</th>
</tr>
</thead>
<tbody>
"""
for vote in sorted_votes:
timestamp = vote.get('timestamp', 'N/A')
username = vote.get('username', 'N/A')
model_a = vote.get('model_a', 'N/A')
model_b = vote.get('model_b', 'N/A')
vote_choice = vote.get('vote', 'N/A')
model_a_output = vote.get('model_a_output', 'N/A')
model_b_output = vote.get('model_b_output', 'N/A')
image_url = vote.get('image_url', 'N/A')
# Format timestamp - handle both ISO format and our custom format
if timestamp != 'N/A':
try:
from datetime import datetime
# Check if it's already in our desired format
if len(timestamp) == 19 and timestamp[10] == ' ':
# Already in YYYY-MM-DD HH:MM:SS format
formatted_time = timestamp
else:
# Convert from ISO format to our format
dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
formatted_time = dt.strftime('%Y-%m-%d %H:%M:%S')
except:
formatted_time = timestamp
else:
formatted_time = 'N/A'
# Get model display names
model_a_name = get_model_display_name(model_a)
model_b_name = get_model_display_name(model_b)
models_display = f"{model_a_name} vs {model_b_name}"
# Determine which model was voted for and get its display name
voted_model_name = ""
vote_color = "gray"
if vote_choice == "model_a":
voted_model_name = model_a_name
vote_color = "green"
elif vote_choice == "model_b":
voted_model_name = model_b_name
vote_color = "blue"
# Truncate OCR outputs for table display (shorter for better fit)
model_a_preview = model_a_output[:80] + "..." if len(model_a_output) > 80 else model_a_output
model_b_preview = model_b_output[:80] + "..." if len(model_b_output) > 80 else model_b_output
# Fix image URL - use the correct Supabase storage URL format
if image_url and image_url != 'N/A' and not image_url.startswith('http'):
# If it's just a path, construct the full URL
import os
image_url = f"{os.getenv('SUPABASE_URL')}/storage/v1/object/public/images/{image_url}"
# Create image thumbnail or placeholder
if image_url and image_url != 'N/A':
image_html = f'<img src="{image_url}" alt="OCR Image" style="width: 60px; height: 45px; object-fit: cover; border-radius: 4px; cursor: pointer;" onclick="window.open(\'{image_url}\', \'_blank\')" title="Click to view full image">'
else:
image_html = '<span style="color: #999; font-style: italic;">No image</span>'
html += f"""
<tr>
<td style="word-wrap: break-word; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;">{formatted_time}</td>
<td style="word-wrap: break-word; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;"><strong>{username}</strong></td>
<td style="word-wrap: break-word; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;"><small>{models_display}</small></td>
<td style="color: {vote_color}; font-weight: bold; word-wrap: break-word; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;">{voted_model_name}</td>
<td style="word-wrap: break-word; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;" title="{model_a_output}">{model_a_preview}</td>
<td style="word-wrap: break-word; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;" title="{model_b_output}">{model_b_preview}</td>
<td style="text-align: center;">{image_html}</td>
</tr>
"""
html += """
</tbody>
</table>
</div>
"""
return html
def format_elo_leaderboard(elo_ratings: Dict[str, float], vote_counts: Dict[str, int] = None) -> str:
"""Format ELO ratings into a leaderboard HTML table."""
# Sort models by ELO rating (highest first)
sorted_models = sorted(elo_ratings.items(), key=lambda x: x[1], reverse=True)
html = """
<div style="padding: 15px; background-color: #f8f9fa; border-radius: 8px;">
<h3>ELO Leaderboard</h3>
<p><em>Models are ranked by their ELO rating. Higher ratings indicate better performance.</em></p>
<table class="vote-table" style="margin-top: 15px;">
<thead>
<tr>
<th>Rank</th>
<th>Model</th>
<th>ELO Rating</th>
<th>Total Votes</th>
</tr>
</thead>
<tbody>
"""
for rank, (model, rating) in enumerate(sorted_models, 1):
# Get model display name
display_name = get_model_display_name(model)
# Get vote count for this model
vote_count = vote_counts.get(model, 0) if vote_counts else 0
html += f"""
<tr>
<td style="font-weight: bold; text-align: center;">{rank}</td>
<td><strong>{display_name}</strong></td>
<td style="font-weight: bold;">{rating:.0f}</td>
<td style="text-align: center;">{vote_count}</td>
</tr>
"""
html += """
</tbody>
</table>
</div>
"""
return html