""" OCR Arena - Main Application A Gradio web application for comparing OCR results from different AI models. """ import gradio as gr import logging import os import datetime from dotenv import load_dotenv from storage import upload_file_to_bucket from db import add_vote, get_all_votes, calculate_elo_ratings_from_votes from ocr_models import process_model_ocr, initialize_gemini, initialize_mistral, initialize_openai from ui_helpers import ( get_model_display_name, select_random_models, format_votes_table, format_elo_leaderboard ) # Load environment variables load_dotenv() # Configure logging logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') logger = logging.getLogger(__name__) # Initialize API keys and models initialize_gemini() initialize_mistral() initialize_openai() # Get Supabase credentials SUPABASE_URL = os.getenv("SUPABASE_URL") SUPABASE_KEY = os.getenv("SUPABASE_KEY") # Global variables to store current OCR results and image URL current_gemini_output = "" current_mistral_output = "" current_openai_output = "" current_gpt5_output = "" current_gpt5_output = "" current_image_url = "" current_voted_users = set() # Track users who have already voted current_model_a = "" # Store which model was selected as model A current_model_b = "" # Store which model was selected as model B def get_default_username(profile: gr.OAuthProfile | None) -> str: """Returns the username if the user is logged in, or an empty string if not logged in.""" if profile is None: return "" return profile.username def get_current_username(profile_or_username) -> str: """Returns the username from login or "Anonymous" if not logged in.""" # Check if profile_or_username is a profile object with username attribute if hasattr(profile_or_username, 'username') and profile_or_username.username: return profile_or_username.username # Check if profile_or_username is a direct username string elif isinstance(profile_or_username, str) and profile_or_username.strip(): # Extract username from "Logout (username)" format if profile_or_username.startswith("Logout (") and profile_or_username.endswith(")"): return profile_or_username[8:-1] # Remove "Logout (" and ")" # If it's just a username string, return it elif profile_or_username != "Sign in with Hugging Face": return profile_or_username.strip() # Return "Anonymous" if no valid username found return "Anonymous" def process_image(image): """Process uploaded image and select random models for comparison.""" global current_gemini_output, current_mistral_output, current_openai_output, current_image_url, current_voted_users, current_model_a, current_model_b if image is None: return ( "Please upload an image.", "Please upload an image.", gr.update(visible=False), # Hide vote buttons gr.update(visible=False) # Hide vote buttons ) # Reset voted users for new image current_voted_users.clear() # Select two random models model_a, model_b = select_random_models() current_model_a = model_a current_model_b = model_b logger.info(f"🎲 Randomly selected two models for comparison") try: # Save the PIL image to a temporary file temp_filename = f"temp_image_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.png" image.save(temp_filename) # Upload the temporary file to Supabase storage logger.info(f"📤 Uploading image to Supabase storage: {temp_filename}") upload_result = upload_file_to_bucket( file_path=temp_filename, bucket_name="images", storage_path=f"ocr_images/{temp_filename}", file_options={"cache-control": "3600", "upsert": "false"} ) if upload_result["success"]: logger.info(f"✅ Image uploaded successfully: {upload_result['storage_path']}") logger.info(f"🔗 Public URL: {upload_result['public_url']}") # Store the image URL for voting current_image_url = upload_result.get('public_url') or f"{SUPABASE_URL}/storage/v1/object/public/images/ocr_images/{temp_filename}" else: logger.error(f"❌ Image upload failed: {upload_result['error']}") current_image_url = "" # Clean up temporary file try: os.remove(temp_filename) logger.info(f"🗑️ Cleaned up temporary file: {temp_filename}") except Exception as e: logger.warning(f"⚠️ Could not remove temporary file {temp_filename}: {e}") # Return initial state - OCR processing will happen via separate button clicks return ( "Please click 'Run OCR' to start processing.", "Please click 'Run OCR' to start processing.", gr.update(visible=False), # Hide vote buttons initially gr.update(visible=False) # Hide vote buttons initially ) except Exception as e: logger.error(f"Error processing image: {e}") return ( f"Error processing image: {e}", f"Error processing image: {e}", gr.update(visible=False), # Hide vote buttons gr.update(visible=False) # Hide vote buttons ) def check_ocr_completion(model_a_output, model_b_output): """Check if both OCR results are ready and update UI accordingly.""" global current_gemini_output, current_mistral_output, current_openai_output, current_gpt5_output, current_model_a, current_model_b # Check if both results are complete (not processing messages) model_a_ready = (model_a_output and model_a_output != "Please upload an image." and model_a_output != "Processing OCR..." and model_a_output != "Please click 'Run OCR' to start processing." and not model_a_output.startswith("OCR error:")) model_b_ready = (model_b_output and model_b_output != "Please upload an image." and model_b_output != "Processing OCR..." and model_b_output != "Please click 'Run OCR' to start processing." and not model_b_output.startswith("OCR error:")) # Update global variables with actual results based on which models were selected if model_a_ready: if current_model_a == "gemini": current_gemini_output = model_a_output elif current_model_a == "mistral": current_mistral_output = model_a_output elif current_model_a == "openai": current_openai_output = model_a_output elif current_model_a == "gpt5": current_gpt5_output = model_a_output if model_b_ready: if current_model_b == "gemini": current_gemini_output = model_b_output elif current_model_b == "mistral": current_mistral_output = model_b_output elif current_model_b == "openai": current_openai_output = model_b_output elif current_model_b == "gpt5": current_gpt5_output = model_b_output # Show vote buttons only when both are ready if model_a_ready and model_b_ready: return ( gr.update(visible=True), # Show Model A vote button gr.update(visible=True) # Show Model B vote button ) else: return ( gr.update(visible=False), # Hide vote buttons gr.update(visible=False) # Hide vote buttons ) def load_vote_data(): """Load and format vote data for display.""" try: # Get all votes votes = get_all_votes() votes_table_html = format_votes_table(votes) return votes_table_html except Exception as e: logger.error(f"Error loading vote data: {e}") error_html = f"
Error loading data: {e}
" return error_html def load_elo_leaderboard(): """Load and format ELO leaderboard data.""" try: # Get all votes votes = get_all_votes() # Calculate ELO ratings elo_ratings = calculate_elo_ratings_from_votes(votes) # Calculate vote counts for each model vote_counts = { "gemini": 0, "mistral": 0, "openai": 0, "gpt5": 0 } for vote in votes: model_a = vote.get('model_a') model_b = vote.get('model_b') vote_choice = vote.get('vote') if vote_choice == 'model_a' and model_a in vote_counts: vote_counts[model_a] += 1 elif vote_choice == 'model_b' and model_b in vote_counts: vote_counts[model_b] += 1 # Format leaderboard with vote counts leaderboard_html = format_elo_leaderboard(elo_ratings, vote_counts) return leaderboard_html except Exception as e: logger.error(f"Error loading ELO leaderboard: {e}") error_html = f"Error loading ELO leaderboard: {e}
" return error_html # Create the Gradio interface with gr.Blocks(title="OCR Comparison", css=""" .output-box { border: 2px solid #e0e0e0; border-radius: 8px; padding: 15px; margin: 10px 0; background-color: #f9f9f9; min-height: 200px; } .output-box:hover { border-color: #007bff; box-shadow: 0 2px 8px rgba(0,123,255,0.1); } .vote-table { border-collapse: collapse; width: 100%; margin: 10px 0; min-width: 800px; } .vote-table th, .vote-table td { border: 1px solid #ddd; padding: 6px; text-align: left; vertical-align: top; } .vote-table th { background-color: #f2f2f2; font-weight: bold; position: sticky; top: 0; z-index: 10; } .vote-table tr:nth-child(even) { background-color: #f9f9f9; } .vote-table tr:hover { background-color: #f5f5f5; } .vote-table img { transition: transform 0.2s ease; max-width: 100%; height: auto; } .vote-table img:hover { transform: scale(1.1); box-shadow: 0 4px 8px rgba(0,0,0,0.2); } """) as demo: with gr.Tabs(): # Arena Tab (default) with gr.Tab("⚔️ Arena", id=0): gr.Markdown("# ⚔️ OCR Arena: Random Model Selection") gr.Markdown("Upload an image to compare two randomly selected OCR models.") # Authentication section (optional) with gr.Row(): with gr.Column(scale=3): username_display = gr.Textbox( label="Current User", placeholder="Login with Hugging Face to vote (optional) - Anonymous users welcome!", interactive=False, show_label=False ) with gr.Column(scale=1): login_button = gr.LoginButton() with gr.Row(): with gr.Column(): gemini_vote_btn = gr.Button("A is better", variant="primary", size="sm", visible=False) gemini_output = gr.Markdown(label="Model A Output", elem_classes=["output-box"]) image_input = gr.Image(type="pil", label="Upload or Paste Image") with gr.Column(): mistral_vote_btn = gr.Button("B is better", variant="primary", size="sm", visible=False) mistral_output = gr.Markdown(label="Model B Output", elem_classes=["output-box"]) with gr.Row(): process_btn = gr.Button("🔍 Run OCR", variant="primary") # Data Tab with gr.Tab("📊 Data", id=1): gr.Markdown("# 📊 Vote Data") gr.Markdown("View all votes from the OCR Arena") with gr.Row(): refresh_btn = gr.Button("🔄 Refresh Data", variant="secondary") with gr.Row(): votes_table = gr.HTML( value="Loading vote data...
", label="📋 All Votes (Latest First)" ) # Leaderboard Tab with gr.Tab("🏆 Leaderboard", id=2): gr.Markdown("# 🏆 ELO Leaderboard") gr.Markdown("See how the models rank based on their ELO ratings from head-to-head comparisons.") with gr.Row(): refresh_leaderboard_btn = gr.Button("🔄 Refresh Leaderboard", variant="secondary") with gr.Row(): leaderboard_display = gr.HTML( value="Loading ELO leaderboard...
", label="🏆 Model Rankings" ) # Vote functions def vote_model_a(profile_or_username): global current_gemini_output, current_mistral_output, current_openai_output, current_gpt5_output, current_image_url, current_voted_users, current_model_a, current_model_b # Get current username username = get_current_username(profile_or_username) if not username: username = "Anonymous" # Check if user has already voted if username in current_voted_users: gr.Info(f"You have already voted for this image, {username}!") return try: # Use the stored image URL from the upload image_url = current_image_url if current_image_url else "no_image" # Add vote to database logger.info(f"📊 Adding Model A vote for user: {username}") def output_for(model: str) -> str: return { "gemini": current_gemini_output, "mistral": current_mistral_output, "openai": current_openai_output, "gpt5": current_gpt5_output, }.get(model, "") add_vote( username=username, model_a=current_model_a, model_b=current_model_b, model_a_output=output_for(current_model_a), model_b_output=output_for(current_model_b), vote="model_a", image_url=image_url ) # Mark user as voted current_voted_users.add(username) model_a_name = get_model_display_name(current_model_a) model_b_name = get_model_display_name(current_model_b) info_message = ( f"You voted for {model_a_name}.
" f"{model_a_name} - " f"{model_b_name}
" ) gr.Info(info_message) except Exception as e: logger.error(f"❌ Error adding Model A vote: {e}") gr.Info(f"Error recording vote: {e}") def vote_model_b(profile_or_username): global current_gemini_output, current_mistral_output, current_openai_output, current_gpt5_output, current_image_url, current_voted_users, current_model_a, current_model_b # Get current username username = get_current_username(profile_or_username) if not username: username = "Anonymous" # Check if user has already voted if username in current_voted_users: gr.Info(f"You have already voted for this image, {username}!") return try: # Use the stored image URL from the upload image_url = current_image_url if current_image_url else "no_image" # Add vote to database logger.info(f"📊 Adding Model B vote for user: {username}") def output_for(model: str) -> str: return { "gemini": current_gemini_output, "mistral": current_mistral_output, "openai": current_openai_output, "gpt5": current_gpt5_output, }.get(model, "") add_vote( username=username, model_a=current_model_a, model_b=current_model_b, model_a_output=output_for(current_model_a), model_b_output=output_for(current_model_b), vote="model_b", image_url=image_url ) # Mark user as voted current_voted_users.add(username) model_a_name = get_model_display_name(current_model_a) model_b_name = get_model_display_name(current_model_b) info_message = ( f"You voted for {model_b_name}.
" f"{model_a_name} - " f"{model_b_name}
" ) gr.Info(info_message) except Exception as e: logger.error(f"❌ Error adding Model B vote: {e}") gr.Info(f"Error recording vote: {e}") # Event handlers process_btn.click( process_image, inputs=[image_input], outputs=[gemini_output, mistral_output, gemini_vote_btn, mistral_vote_btn], ) # Process both randomly selected OCRs when the process button is clicked def process_model_a_ocr(image): global current_model_a return process_model_ocr(image, current_model_a) def process_model_b_ocr(image): global current_model_b return process_model_ocr(image, current_model_b) process_btn.click( process_model_a_ocr, inputs=[image_input], outputs=[gemini_output], ) process_btn.click( process_model_b_ocr, inputs=[image_input], outputs=[mistral_output], ) # Check completion status when either OCR output changes gemini_output.change( check_ocr_completion, inputs=[gemini_output, mistral_output], outputs=[gemini_vote_btn, mistral_vote_btn], ) mistral_output.change( check_ocr_completion, inputs=[gemini_output, mistral_output], outputs=[gemini_vote_btn, mistral_vote_btn], ) gemini_vote_btn.click( vote_model_a, inputs=[login_button] ) mistral_vote_btn.click( vote_model_b, inputs=[login_button] ) # Refresh data button refresh_btn.click( load_vote_data, inputs=None, outputs=[votes_table] ) # Refresh leaderboard button refresh_leaderboard_btn.click( load_elo_leaderboard, inputs=None, outputs=[leaderboard_display] ) # Update username display when user logs in demo.load(fn=get_default_username, inputs=None, outputs=username_display) # Load vote data when app starts demo.load(fn=load_vote_data, inputs=None, outputs=[votes_table]) # Load leaderboard when app starts demo.load(fn=load_elo_leaderboard, inputs=None, outputs=[leaderboard_display]) if __name__ == "__main__": logger.info("Starting OCR Comparison App...") try: # Try to launch on localhost first demo.launch(share=True) except ValueError as e: logger.warning(f"Localhost not accessible: {e}") logger.info("Launching with public URL...") demo.launch(share=True)