Wassymk commited on
Commit
28673b1
·
1 Parent(s): e0ecc62

add elo score

Browse files
Files changed (4) hide show
  1. app.py +218 -345
  2. db.py +102 -6
  3. ocr_models.py +158 -0
  4. ui_helpers.py +210 -0
app.py CHANGED
@@ -1,64 +1,55 @@
 
 
 
 
 
1
  import gradio as gr
2
- import google.generativeai as genai
3
- from mistralai import Mistral
4
- from PIL import Image
5
- import io
6
- import base64
7
  import logging
8
  import os
 
9
  from dotenv import load_dotenv
10
  from storage import upload_file_to_bucket
11
- from db import add_vote, get_all_votes, get_vote_statistics
12
- import datetime
 
 
 
 
13
 
14
- # Load environment variables from .env file
15
  load_dotenv()
16
 
17
  # Configure logging
18
  logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
19
  logger = logging.getLogger(__name__)
20
 
21
- # Configure API keys from environment variables
22
- GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
23
- MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
24
-
25
- # Log API key status (without exposing the actual keys)
26
- if GEMINI_API_KEY:
27
- logger.info("✅ GEMINI_API_KEY loaded successfully")
28
- else:
29
- logger.error("❌ GEMINI_API_KEY not found in environment variables")
30
-
31
- if MISTRAL_API_KEY:
32
- logger.info("✅ MISTRAL_API_KEY loaded successfully")
33
- else:
34
- logger.error("❌ MISTRAL_API_KEY not found in environment variables")
35
-
36
- genai.configure(api_key=GEMINI_API_KEY)
37
- gemini_model = genai.GenerativeModel('gemini-2.0-flash-exp')
38
 
 
39
  SUPABASE_URL = os.getenv("SUPABASE_URL")
40
  SUPABASE_KEY = os.getenv("SUPABASE_KEY")
41
 
42
  # Global variables to store current OCR results and image URL
43
  current_gemini_output = ""
44
  current_mistral_output = ""
 
45
  current_image_url = ""
46
  current_voted_users = set() # Track users who have already voted
 
 
47
 
48
 
49
  def get_default_username(profile: gr.OAuthProfile | None) -> str:
50
- """
51
- Returns the username if the user is logged in, or an empty string if not logged in.
52
- """
53
  if profile is None:
54
  return ""
55
  return profile.username
56
 
57
  def get_current_username(profile_or_username) -> str:
58
- """
59
- Returns the username from login or "Anonymous" if not logged in.
60
- Handles both profile objects and direct username strings.
61
- """
62
  # Check if profile_or_username is a profile object with username attribute
63
  if hasattr(profile_or_username, 'username') and profile_or_username.username:
64
  return profile_or_username.username
@@ -74,206 +65,9 @@ def get_current_username(profile_or_username) -> str:
74
  # Return "Anonymous" if no valid username found
75
  return "Anonymous"
76
 
77
- def format_votes_table(votes):
78
- """
79
- Format votes data into an HTML table.
80
- """
81
- if not votes:
82
- return "<p>No votes found in the database.</p>"
83
-
84
- # Sort votes by timestamp (latest first)
85
- sorted_votes = sorted(votes, key=lambda x: x.get('timestamp', ''), reverse=True)
86
-
87
- html = """
88
- <table class="vote-table">
89
- <thead>
90
- <tr>
91
- <th>Timestamp</th>
92
- <th>Username</th>
93
- <th>Vote</th>
94
- <th>Gemini Output</th>
95
- <th>Mistral Output</th>
96
- <th>Image</th>
97
- </tr>
98
- </thead>
99
- <tbody>
100
- """
101
-
102
- for vote in sorted_votes:
103
- timestamp = vote.get('timestamp', 'N/A')
104
- username = vote.get('username', 'N/A')
105
- vote_choice = vote.get('vote', 'N/A')
106
- gemini_output = vote.get('model_a_output', 'N/A')
107
- mistral_output = vote.get('model_b_output', 'N/A')
108
- image_url = vote.get('image_url', 'N/A')
109
-
110
- # Format timestamp - handle both ISO format and our custom format
111
- if timestamp != 'N/A':
112
- try:
113
- from datetime import datetime
114
- # Check if it's already in our desired format
115
- if len(timestamp) == 19 and timestamp[10] == ' ':
116
- # Already in YYYY-MM-DD HH:MM:SS format
117
- formatted_time = timestamp
118
- else:
119
- # Convert from ISO format to our format
120
- dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
121
- formatted_time = dt.strftime('%Y-%m-%d %H:%M:%S')
122
- except:
123
- formatted_time = timestamp
124
- else:
125
- formatted_time = 'N/A'
126
-
127
- # Color code the vote
128
- vote_color = "green" if vote_choice == "gemini" else "blue" if vote_choice == "mistral" else "gray"
129
-
130
- # Truncate OCR outputs for table display
131
- gemini_preview = gemini_output[:100] + "..." if len(gemini_output) > 100 else gemini_output
132
- mistral_preview = mistral_output[:100] + "..." if len(mistral_output) > 100 else mistral_output
133
-
134
- # Fix image URL - use the correct Supabase storage URL format
135
- if image_url and image_url != 'N/A' and not image_url.startswith('http'):
136
- # If it's just a path, construct the full URL
137
- image_url = f"{os.getenv('SUPABASE_URL')}/storage/v1/object/public/images/{image_url}"
138
-
139
- # Create image thumbnail or placeholder
140
- if image_url and image_url != 'N/A':
141
- image_html = f'<img src="{image_url}" alt="OCR Image" style="width: 80px; height: 60px; object-fit: cover; border-radius: 4px; cursor: pointer;" onclick="window.open(\'{image_url}\', \'_blank\')" title="Click to view full image">'
142
- else:
143
- image_html = '<span style="color: #999; font-style: italic;">No image</span>'
144
-
145
- html += f"""
146
- <tr>
147
- <td>{formatted_time}</td>
148
- <td><strong>{username}</strong></td>
149
- <td style="color: {vote_color}; font-weight: bold;">{vote_choice.upper()}</td>
150
- <td title="{gemini_output}">{gemini_preview}</td>
151
- <td title="{mistral_output}">{mistral_preview}</td>
152
- <td>{image_html}</td>
153
- </tr>
154
- """
155
-
156
- html += """
157
- </tbody>
158
- </table>
159
- """
160
-
161
- return html
162
-
163
- def format_statistics(stats):
164
- """
165
- Format statistics data into HTML.
166
- """
167
- if not stats:
168
- return "<p>No statistics available.</p>"
169
-
170
- total_votes = stats.get('total_votes', 0)
171
- gemini_votes = stats.get('gemini_votes', 0)
172
- mistral_votes = stats.get('mistral_votes', 0)
173
- gemini_percentage = stats.get('gemini_percentage', 0)
174
- mistral_percentage = stats.get('mistral_percentage', 0)
175
-
176
- html = f"""
177
- <div style="padding: 15px; background-color: #f8f9fa; border-radius: 8px;">
178
- <h3>📊 Overall Statistics</h3>
179
- <p><strong>Total Votes:</strong> {total_votes}</p>
180
-
181
- <h4>🤖 Gemini Votes</h4>
182
- <p><strong>Count:</strong> {gemini_votes} ({gemini_percentage:.1f}%)</p>
183
-
184
- <h4>🦅 Mistral Votes</h4>
185
- <p><strong>Count:</strong> {mistral_votes} ({mistral_percentage:.1f}%)</p>
186
-
187
- <div style="margin-top: 20px; padding: 10px; background-color: #e9ecef; border-radius: 5px;">
188
- <h4>🏆 Current Winner</h4>
189
- {f'<p style="color: green; font-weight: bold;">🤖 Gemini is leading!</p>' if gemini_votes > mistral_votes else f'<p style="color: blue; font-weight: bold;">🦅 Mistral is leading!</p>' if mistral_votes > gemini_votes else '<p style="color: gray; font-weight: bold;">🤝 It is a tie!</p>'}
190
- </div>
191
- </div>
192
- """
193
-
194
- return html
195
-
196
- def load_vote_data():
197
- """
198
- Load and format vote data for display.
199
- """
200
- try:
201
- # Get all votes
202
- votes = get_all_votes()
203
- votes_table_html = format_votes_table(votes)
204
-
205
- # Get statistics
206
- stats = get_vote_statistics()
207
- stats_html = format_statistics(stats)
208
-
209
- return votes_table_html, stats_html
210
-
211
- except Exception as e:
212
- logger.error(f"Error loading vote data: {e}")
213
- error_html = f"<p style='color: red;'>Error loading data: {e}</p>"
214
- return error_html, error_html
215
-
216
- def gemini_ocr(image: Image.Image):
217
- try:
218
- # Convert image to bytes
219
- buffered = io.BytesIO()
220
- image.save(buffered, format="JPEG")
221
- image_bytes = buffered.getvalue()
222
-
223
- # Create image part
224
- image_part = {
225
- "mime_type": "image/jpeg",
226
- "data": image_bytes
227
- }
228
-
229
- # Generate content
230
- response = gemini_model.generate_content([
231
- "Please transcribe all text visible in this image in markdown format. Return only the transcribed text without any additional commentary. Do not include any icon names such as Pokeball icon or Clipboard icon. Only extract the actual text that a human can read directly from the image. Format the output clearly using appropriate Markdown, such as headings, bold text, and paragraphs. The output should contain only the transcribed text, with no additional explanation or description.",
232
- image_part
233
- ])
234
-
235
- logger.info("Gemini OCR completed successfully")
236
- return str(response.text)
237
-
238
- except Exception as e:
239
- logger.error(f"Gemini OCR error: {e}")
240
- return f"Gemini OCR error: {e}"
241
-
242
- def mistral_ocr(image: Image.Image):
243
- try:
244
- # Convert image to base64
245
- buffered = io.BytesIO()
246
- image.save(buffered, format="JPEG")
247
- img_bytes = buffered.getvalue()
248
- base64_image = base64.b64encode(img_bytes).decode('utf-8')
249
-
250
- client = Mistral(api_key=MISTRAL_API_KEY)
251
- ocr_response = client.ocr.process(
252
- model="mistral-ocr-latest",
253
- document={
254
- "type": "image_url",
255
- "image_url": f"data:image/jpeg;base64,{base64_image}"
256
- }
257
- )
258
-
259
- # Extract markdown from the first page if available
260
- markdown_text = ""
261
- if hasattr(ocr_response, 'pages') and ocr_response.pages:
262
- page = ocr_response.pages[0]
263
- markdown_text = getattr(page, 'markdown', "")
264
-
265
- if not markdown_text:
266
- markdown_text = str(ocr_response)
267
-
268
- logger.info("Mistral OCR completed successfully")
269
- return markdown_text
270
-
271
- except Exception as e:
272
- logger.error(f"Mistral OCR error: {e}")
273
- return f"Mistral OCR error: {e}"
274
-
275
  def process_image(image):
276
- global current_gemini_output, current_mistral_output, current_image_url, current_voted_users
 
277
 
278
  if image is None:
279
  return (
@@ -287,6 +81,13 @@ def process_image(image):
287
  # Reset voted users for new image
288
  current_voted_users.clear()
289
 
 
 
 
 
 
 
 
290
  try:
291
  # Save the PIL image to a temporary file
292
  temp_filename = f"temp_image_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
@@ -318,12 +119,15 @@ def process_image(image):
318
  logger.warning(f"⚠️ Could not remove temporary file {temp_filename}: {e}")
319
 
320
  # Return initial state - OCR processing will happen via separate button clicks
 
 
 
321
  return (
322
- "Please click 'Run OCR' to start processing.",
323
- "Please click 'Run OCR' to start processing.",
324
  gr.update(visible=False), # Hide vote buttons initially
325
  gr.update(visible=False), # Hide vote buttons initially
326
- "Image uploaded! Click 'Run OCR' to process."
327
  )
328
 
329
  except Exception as e:
@@ -336,72 +140,49 @@ def process_image(image):
336
  f"Error: {e}"
337
  )
338
 
339
- def process_gemini_ocr(image):
340
- """Process Gemini OCR and return result immediately."""
341
- global current_gemini_output
342
-
343
- if image is None:
344
- return "Please upload an image."
345
-
346
- try:
347
- logger.info("🤖 Starting Gemini OCR processing...")
348
- result = gemini_ocr(image)
349
- current_gemini_output = result
350
- logger.info("✅ Gemini OCR completed")
351
- return result
352
- except Exception as e:
353
- logger.error(f"❌ Gemini OCR error: {e}")
354
- return f"Gemini OCR error: {e}"
355
-
356
- def process_mistral_ocr(image):
357
- """Process Mistral OCR and return result immediately."""
358
- global current_mistral_output
359
-
360
- if image is None:
361
- return "Please upload an image."
362
-
363
- try:
364
- logger.info("🦅 Starting Mistral OCR processing...")
365
- result = mistral_ocr(image)
366
- current_mistral_output = result
367
- logger.info("✅ Mistral OCR completed")
368
- return result
369
- except Exception as e:
370
- logger.error(f"❌ Mistral OCR error: {e}")
371
- return f"Mistral OCR error: {e}"
372
-
373
- def check_ocr_completion(gemini_output, mistral_output):
374
  """Check if both OCR results are ready and update UI accordingly."""
375
- global current_gemini_output, current_mistral_output
376
-
377
  # Check if both results are complete (not processing messages)
378
- gemini_ready = (gemini_output and
379
- gemini_output != "Please upload an image." and
380
- gemini_output != "Processing Gemini OCR..." and
381
- gemini_output != "Please click 'Run OCR' to start processing." and
382
- not gemini_output.startswith("Gemini OCR error:"))
383
-
384
- mistral_ready = (mistral_output and
385
- mistral_output != "Please upload an image." and
386
- mistral_output != "Processing Mistral OCR..." and
387
- mistral_output != "Please click 'Run OCR' to start processing." and
388
- not mistral_output.startswith("Mistral OCR error:"))
389
-
390
- # Update global variables with actual results
391
- if gemini_ready:
392
- current_gemini_output = gemini_output
393
- if mistral_ready:
394
- current_mistral_output = mistral_output
 
 
 
395
 
 
 
 
 
 
 
 
 
396
  # Show vote buttons only when both are ready
397
- if gemini_ready and mistral_ready:
398
  return (
399
- gr.update(visible=True), # Show Gemini vote button
400
- gr.update(visible=True), # Show Mistral vote button
401
  "OCR completed! You can now vote for your preferred result (optional)."
402
  )
403
- elif gemini_ready or mistral_ready:
404
- ready_count = sum([gemini_ready, mistral_ready])
405
  return (
406
  gr.update(visible=False), # Hide vote buttons
407
  gr.update(visible=False), # Hide vote buttons
@@ -414,7 +195,59 @@ def check_ocr_completion(gemini_output, mistral_output):
414
  "Processing OCR results..."
415
  )
416
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
 
 
 
 
 
418
 
419
  # Create the Gradio interface
420
  with gr.Blocks(title="OCR Comparison", css="""
@@ -447,24 +280,24 @@ with gr.Blocks(title="OCR Comparison", css="""
447
  .vote-table tr:nth-child(even) {
448
  background-color: #f9f9f9;
449
  }
450
- .vote-table tr:hover {
451
- background-color: #f5f5f5;
452
- }
453
- .vote-table img {
454
- transition: transform 0.2s ease;
455
- }
456
- .vote-table img:hover {
457
- transform: scale(1.1);
458
- box-shadow: 0 4px 8px rgba(0,0,0,0.2);
459
- }
460
  """) as demo:
461
-
462
  with gr.Tabs():
463
  # Arena Tab (default)
464
  with gr.Tab("⚔️ Arena", id=0):
465
- gr.Markdown("# ⚔️ OCR Arena: Gemini 2.0 Flash vs Mistral OCR")
466
- gr.Markdown("Upload an image to compare OCR results.")
467
-
468
  # Authentication section (optional)
469
  with gr.Row():
470
  with gr.Column(scale=3):
@@ -476,18 +309,18 @@ with gr.Blocks(title="OCR Comparison", css="""
476
  )
477
  with gr.Column(scale=1):
478
  login_button = gr.LoginButton()
479
-
480
  with gr.Row():
481
  with gr.Column():
482
  gemini_vote_btn = gr.Button("A is better", variant="primary", size="sm", visible=False)
483
- gemini_output = gr.Markdown(label="🤖 Gemini OCR Output", elem_classes=["output-box"])
484
-
485
  image_input = gr.Image(type="pil", label="Upload or Paste Image")
486
-
487
  with gr.Column():
488
  mistral_vote_btn = gr.Button("B is better", variant="primary", size="sm", visible=False)
489
- mistral_output = gr.Markdown(label="🦅 Mistral OCR Output", elem_classes=["output-box"])
490
-
491
  # Status indicator
492
  status_text = gr.Textbox(
493
  label="Status",
@@ -495,18 +328,18 @@ with gr.Blocks(title="OCR Comparison", css="""
495
  interactive=False,
496
  show_label=False
497
  )
498
-
499
  with gr.Row():
500
  process_btn = gr.Button("🔍 Run OCR", variant="primary")
501
-
502
  # Data Tab
503
  with gr.Tab("📊 Data", id=1):
504
  gr.Markdown("# 📊 Vote Data & Statistics")
505
  gr.Markdown("View all votes and statistics from the OCR Arena")
506
-
507
  with gr.Row():
508
  refresh_btn = gr.Button("🔄 Refresh Data", variant="secondary")
509
-
510
  with gr.Row():
511
  with gr.Column(scale=2):
512
  votes_table = gr.HTML(
@@ -518,10 +351,24 @@ with gr.Blocks(title="OCR Comparison", css="""
518
  value="<p>Loading statistics...</p>",
519
  label="📈 Vote Statistics"
520
  )
521
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
522
  # Vote functions
523
- def vote_gemini(profile_or_username):
524
- global current_gemini_output, current_mistral_output, current_image_url, current_voted_users
525
 
526
  # Get current username
527
  username = get_current_username(profile_or_username)
@@ -539,31 +386,35 @@ with gr.Blocks(title="OCR Comparison", css="""
539
  image_url = current_image_url if current_image_url else "no_image"
540
 
541
  # Add vote to database
542
- logger.info(f"📊 Adding Gemini vote for user: {username}")
543
  add_vote(
544
  username=username,
545
- model_a_output=current_gemini_output,
546
- model_b_output=current_mistral_output,
547
- vote="gemini",
 
 
548
  image_url=image_url
549
  )
550
 
551
  # Mark user as voted
552
  current_voted_users.add(username)
553
 
 
 
554
  info_message = (
555
- f"<p>You voted for <strong style='color:green;'>👈 Gemini OCR</strong>.</p>"
556
- f"<p><span style='color:green;'>👈 Gemini OCR</span> - "
557
- f"<span style='color:blue;'>👉 Mistral OCR</span></p>"
558
  )
559
  gr.Info(info_message)
560
 
561
  except Exception as e:
562
- logger.error(f"❌ Error adding Gemini vote: {e}")
563
  gr.Info(f"Error recording vote: {e}")
564
 
565
- def vote_mistral(profile_or_username):
566
- global current_gemini_output, current_mistral_output, current_image_url, current_voted_users
567
 
568
  # Get current username
569
  username = get_current_username(profile_or_username)
@@ -581,27 +432,31 @@ with gr.Blocks(title="OCR Comparison", css="""
581
  image_url = current_image_url if current_image_url else "no_image"
582
 
583
  # Add vote to database
584
- logger.info(f"📊 Adding Mistral vote for user: {username}")
585
  add_vote(
586
  username=username,
587
- model_a_output=current_gemini_output,
588
- model_b_output=current_mistral_output,
589
- vote="mistral",
 
 
590
  image_url=image_url
591
  )
592
 
593
  # Mark user as voted
594
  current_voted_users.add(username)
595
 
 
 
596
  info_message = (
597
- f"<p>You voted for <strong style='color:blue;'>👉 Mistral OCR</strong>.</p>"
598
- f"<p><span style='color:green;'>👈 Gemini OCR</span> - "
599
- f"<span style='color:blue;'>👉 Mistral OCR</span></p>"
600
  )
601
  gr.Info(info_message)
602
 
603
  except Exception as e:
604
- logger.error(f"❌ Error adding Mistral vote: {e}")
605
  gr.Info(f"Error recording vote: {e}")
606
 
607
  # Event handlers
@@ -611,15 +466,23 @@ with gr.Blocks(title="OCR Comparison", css="""
611
  outputs=[gemini_output, mistral_output, gemini_vote_btn, mistral_vote_btn, status_text],
612
  )
613
 
614
- # Process both OCRs when the process button is clicked
 
 
 
 
 
 
 
 
615
  process_btn.click(
616
- process_gemini_ocr,
617
  inputs=[image_input],
618
  outputs=[gemini_output],
619
  )
620
 
621
  process_btn.click(
622
- process_mistral_ocr,
623
  inputs=[image_input],
624
  outputs=[mistral_output],
625
  )
@@ -638,12 +501,12 @@ with gr.Blocks(title="OCR Comparison", css="""
638
  )
639
 
640
  gemini_vote_btn.click(
641
- vote_gemini,
642
  inputs=[login_button]
643
  )
644
 
645
  mistral_vote_btn.click(
646
- vote_mistral,
647
  inputs=[login_button]
648
  )
649
 
@@ -653,13 +516,23 @@ with gr.Blocks(title="OCR Comparison", css="""
653
  inputs=None,
654
  outputs=[votes_table, stats_display]
655
  )
656
-
 
 
 
 
 
 
 
657
  # Update username display when user logs in
658
  demo.load(fn=get_default_username, inputs=None, outputs=username_display)
659
-
660
  # Load vote data when app starts
661
  demo.load(fn=load_vote_data, inputs=None, outputs=[votes_table, stats_display])
662
 
 
 
 
663
  if __name__ == "__main__":
664
  logger.info("Starting OCR Comparison App...")
665
  try:
 
1
+ """
2
+ OCR Arena - Main Application
3
+ A Gradio web application for comparing OCR results from different AI models.
4
+ """
5
+
6
  import gradio as gr
 
 
 
 
 
7
  import logging
8
  import os
9
+ import datetime
10
  from dotenv import load_dotenv
11
  from storage import upload_file_to_bucket
12
+ from db import add_vote, get_all_votes, get_vote_statistics, calculate_elo_ratings_from_votes
13
+ from ocr_models import process_model_ocr, initialize_gemini, initialize_mistral, initialize_openai
14
+ from ui_helpers import (
15
+ get_model_display_name, select_random_models, format_votes_table,
16
+ format_statistics, format_elo_leaderboard
17
+ )
18
 
19
+ # Load environment variables
20
  load_dotenv()
21
 
22
  # Configure logging
23
  logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
24
  logger = logging.getLogger(__name__)
25
 
26
+ # Initialize API keys and models
27
+ initialize_gemini()
28
+ initialize_mistral()
29
+ initialize_openai()
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ # Get Supabase credentials
32
  SUPABASE_URL = os.getenv("SUPABASE_URL")
33
  SUPABASE_KEY = os.getenv("SUPABASE_KEY")
34
 
35
  # Global variables to store current OCR results and image URL
36
  current_gemini_output = ""
37
  current_mistral_output = ""
38
+ current_openai_output = ""
39
  current_image_url = ""
40
  current_voted_users = set() # Track users who have already voted
41
+ current_model_a = "" # Store which model was selected as model A
42
+ current_model_b = "" # Store which model was selected as model B
43
 
44
 
45
  def get_default_username(profile: gr.OAuthProfile | None) -> str:
46
+ """Returns the username if the user is logged in, or an empty string if not logged in."""
 
 
47
  if profile is None:
48
  return ""
49
  return profile.username
50
 
51
  def get_current_username(profile_or_username) -> str:
52
+ """Returns the username from login or "Anonymous" if not logged in."""
 
 
 
53
  # Check if profile_or_username is a profile object with username attribute
54
  if hasattr(profile_or_username, 'username') and profile_or_username.username:
55
  return profile_or_username.username
 
65
  # Return "Anonymous" if no valid username found
66
  return "Anonymous"
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  def process_image(image):
69
+ """Process uploaded image and select random models for comparison."""
70
+ global current_gemini_output, current_mistral_output, current_openai_output, current_image_url, current_voted_users, current_model_a, current_model_b
71
 
72
  if image is None:
73
  return (
 
81
  # Reset voted users for new image
82
  current_voted_users.clear()
83
 
84
+ # Select two random models
85
+ model_a, model_b = select_random_models()
86
+ current_model_a = model_a
87
+ current_model_b = model_b
88
+
89
+ logger.info(f"🎲 Randomly selected models: {get_model_display_name(model_a)} vs {get_model_display_name(model_b)}")
90
+
91
  try:
92
  # Save the PIL image to a temporary file
93
  temp_filename = f"temp_image_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
 
119
  logger.warning(f"⚠️ Could not remove temporary file {temp_filename}: {e}")
120
 
121
  # Return initial state - OCR processing will happen via separate button clicks
122
+ model_a_name = get_model_display_name(current_model_a)
123
+ model_b_name = get_model_display_name(current_model_b)
124
+
125
  return (
126
+ f"Please click 'Run OCR' to start processing.\n\nSelected: {model_a_name}",
127
+ f"Please click 'Run OCR' to start processing.\n\nSelected: {model_b_name}",
128
  gr.update(visible=False), # Hide vote buttons initially
129
  gr.update(visible=False), # Hide vote buttons initially
130
+ f"Image uploaded! Randomly selected: {model_a_name} vs {model_b_name}. Click 'Run OCR' to process."
131
  )
132
 
133
  except Exception as e:
 
140
  f"Error: {e}"
141
  )
142
 
143
+ def check_ocr_completion(model_a_output, model_b_output):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  """Check if both OCR results are ready and update UI accordingly."""
145
+ global current_gemini_output, current_mistral_output, current_openai_output, current_model_a, current_model_b
146
+
147
  # Check if both results are complete (not processing messages)
148
+ model_a_ready = (model_a_output and
149
+ model_a_output != "Please upload an image." and
150
+ model_a_output != "Processing OCR..." and
151
+ model_a_output != "Please click 'Run OCR' to start processing." and
152
+ not model_a_output.startswith("OCR error:"))
153
+
154
+ model_b_ready = (model_b_output and
155
+ model_b_output != "Please upload an image." and
156
+ model_b_output != "Processing OCR..." and
157
+ model_b_output != "Please click 'Run OCR' to start processing." and
158
+ not model_b_output.startswith("OCR error:"))
159
+
160
+ # Update global variables with actual results based on which models were selected
161
+ if model_a_ready:
162
+ if current_model_a == "gemini":
163
+ current_gemini_output = model_a_output
164
+ elif current_model_a == "mistral":
165
+ current_mistral_output = model_a_output
166
+ elif current_model_a == "openai":
167
+ current_openai_output = model_a_output
168
 
169
+ if model_b_ready:
170
+ if current_model_b == "gemini":
171
+ current_gemini_output = model_b_output
172
+ elif current_model_b == "mistral":
173
+ current_mistral_output = model_b_output
174
+ elif current_model_b == "openai":
175
+ current_openai_output = model_b_output
176
+
177
  # Show vote buttons only when both are ready
178
+ if model_a_ready and model_b_ready:
179
  return (
180
+ gr.update(visible=True), # Show Model A vote button
181
+ gr.update(visible=True), # Show Model B vote button
182
  "OCR completed! You can now vote for your preferred result (optional)."
183
  )
184
+ elif model_a_ready or model_b_ready:
185
+ ready_count = sum([model_a_ready, model_b_ready])
186
  return (
187
  gr.update(visible=False), # Hide vote buttons
188
  gr.update(visible=False), # Hide vote buttons
 
195
  "Processing OCR results..."
196
  )
197
 
198
+ def load_vote_data():
199
+ """Load and format vote data for display."""
200
+ try:
201
+ # Get all votes
202
+ votes = get_all_votes()
203
+ votes_table_html = format_votes_table(votes)
204
+
205
+ # Get statistics
206
+ stats = get_vote_statistics()
207
+ stats_html = format_statistics(stats)
208
+
209
+ return votes_table_html, stats_html
210
+
211
+ except Exception as e:
212
+ logger.error(f"Error loading vote data: {e}")
213
+ error_html = f"<p style='color: red;'>Error loading data: {e}</p>"
214
+ return error_html, error_html
215
+
216
+ def load_elo_leaderboard():
217
+ """Load and format ELO leaderboard data."""
218
+ try:
219
+ # Get all votes
220
+ votes = get_all_votes()
221
+
222
+ # Calculate ELO ratings
223
+ elo_ratings = calculate_elo_ratings_from_votes(votes)
224
+
225
+ # Calculate vote counts for each model
226
+ vote_counts = {
227
+ "gemini": 0,
228
+ "mistral": 0,
229
+ "openai": 0
230
+ }
231
+
232
+ for vote in votes:
233
+ model_a = vote.get('model_a')
234
+ model_b = vote.get('model_b')
235
+ vote_choice = vote.get('vote')
236
+
237
+ if vote_choice == 'model_a' and model_a in vote_counts:
238
+ vote_counts[model_a] += 1
239
+ elif vote_choice == 'model_b' and model_b in vote_counts:
240
+ vote_counts[model_b] += 1
241
+
242
+ # Format leaderboard with vote counts
243
+ leaderboard_html = format_elo_leaderboard(elo_ratings, vote_counts)
244
+
245
+ return leaderboard_html
246
 
247
+ except Exception as e:
248
+ logger.error(f"Error loading ELO leaderboard: {e}")
249
+ error_html = f"<p style='color: red;'>Error loading ELO leaderboard: {e}</p>"
250
+ return error_html
251
 
252
  # Create the Gradio interface
253
  with gr.Blocks(title="OCR Comparison", css="""
 
280
  .vote-table tr:nth-child(even) {
281
  background-color: #f9f9f9;
282
  }
283
+ .vote-table tr:hover {
284
+ background-color: #f5f5f5;
285
+ }
286
+ .vote-table img {
287
+ transition: transform 0.2s ease;
288
+ }
289
+ .vote-table img:hover {
290
+ transform: scale(1.1);
291
+ box-shadow: 0 4px 8px rgba(0,0,0,0.2);
292
+ }
293
  """) as demo:
294
+
295
  with gr.Tabs():
296
  # Arena Tab (default)
297
  with gr.Tab("⚔️ Arena", id=0):
298
+ gr.Markdown("# ⚔️ OCR Arena: Random Model Selection")
299
+ gr.Markdown("Upload an image to compare two randomly selected OCR models.")
300
+
301
  # Authentication section (optional)
302
  with gr.Row():
303
  with gr.Column(scale=3):
 
309
  )
310
  with gr.Column(scale=1):
311
  login_button = gr.LoginButton()
312
+
313
  with gr.Row():
314
  with gr.Column():
315
  gemini_vote_btn = gr.Button("A is better", variant="primary", size="sm", visible=False)
316
+ gemini_output = gr.Markdown(label="Model A Output", elem_classes=["output-box"])
317
+
318
  image_input = gr.Image(type="pil", label="Upload or Paste Image")
319
+
320
  with gr.Column():
321
  mistral_vote_btn = gr.Button("B is better", variant="primary", size="sm", visible=False)
322
+ mistral_output = gr.Markdown(label="Model B Output", elem_classes=["output-box"])
323
+
324
  # Status indicator
325
  status_text = gr.Textbox(
326
  label="Status",
 
328
  interactive=False,
329
  show_label=False
330
  )
331
+
332
  with gr.Row():
333
  process_btn = gr.Button("🔍 Run OCR", variant="primary")
334
+
335
  # Data Tab
336
  with gr.Tab("📊 Data", id=1):
337
  gr.Markdown("# 📊 Vote Data & Statistics")
338
  gr.Markdown("View all votes and statistics from the OCR Arena")
339
+
340
  with gr.Row():
341
  refresh_btn = gr.Button("🔄 Refresh Data", variant="secondary")
342
+
343
  with gr.Row():
344
  with gr.Column(scale=2):
345
  votes_table = gr.HTML(
 
351
  value="<p>Loading statistics...</p>",
352
  label="📈 Vote Statistics"
353
  )
354
+
355
+ # Leaderboard Tab
356
+ with gr.Tab("🏆 Leaderboard", id=2):
357
+ gr.Markdown("# 🏆 ELO Leaderboard")
358
+ gr.Markdown("See how the models rank based on their ELO ratings from head-to-head comparisons.")
359
+
360
+ with gr.Row():
361
+ refresh_leaderboard_btn = gr.Button("🔄 Refresh Leaderboard", variant="secondary")
362
+
363
+ with gr.Row():
364
+ leaderboard_display = gr.HTML(
365
+ value="<p>Loading ELO leaderboard...</p>",
366
+ label="🏆 Model Rankings"
367
+ )
368
+
369
  # Vote functions
370
+ def vote_model_a(profile_or_username):
371
+ global current_gemini_output, current_mistral_output, current_openai_output, current_image_url, current_voted_users, current_model_a, current_model_b
372
 
373
  # Get current username
374
  username = get_current_username(profile_or_username)
 
386
  image_url = current_image_url if current_image_url else "no_image"
387
 
388
  # Add vote to database
389
+ logger.info(f"📊 Adding Model A vote for user: {username}")
390
  add_vote(
391
  username=username,
392
+ model_a=current_model_a,
393
+ model_b=current_model_b,
394
+ model_a_output=current_gemini_output if current_model_a == "gemini" else current_mistral_output if current_model_a == "mistral" else current_openai_output,
395
+ model_b_output=current_gemini_output if current_model_b == "gemini" else current_mistral_output if current_model_b == "mistral" else current_openai_output,
396
+ vote="model_a",
397
  image_url=image_url
398
  )
399
 
400
  # Mark user as voted
401
  current_voted_users.add(username)
402
 
403
+ model_a_name = get_model_display_name(current_model_a)
404
+ model_b_name = get_model_display_name(current_model_b)
405
  info_message = (
406
+ f"<p>You voted for <strong style='color:green;'>{model_a_name}</strong>.</p>"
407
+ f"<p><span style='color:green;'>{model_a_name}</span> - "
408
+ f"<span style='color:blue;'>{model_b_name}</span></p>"
409
  )
410
  gr.Info(info_message)
411
 
412
  except Exception as e:
413
+ logger.error(f"❌ Error adding Model A vote: {e}")
414
  gr.Info(f"Error recording vote: {e}")
415
 
416
+ def vote_model_b(profile_or_username):
417
+ global current_gemini_output, current_mistral_output, current_openai_output, current_image_url, current_voted_users, current_model_a, current_model_b
418
 
419
  # Get current username
420
  username = get_current_username(profile_or_username)
 
432
  image_url = current_image_url if current_image_url else "no_image"
433
 
434
  # Add vote to database
435
+ logger.info(f"📊 Adding Model B vote for user: {username}")
436
  add_vote(
437
  username=username,
438
+ model_a=current_model_a,
439
+ model_b=current_model_b,
440
+ model_a_output=current_gemini_output if current_model_a == "gemini" else current_mistral_output if current_model_a == "mistral" else current_openai_output,
441
+ model_b_output=current_gemini_output if current_model_b == "gemini" else current_mistral_output if current_model_b == "mistral" else current_openai_output,
442
+ vote="model_b",
443
  image_url=image_url
444
  )
445
 
446
  # Mark user as voted
447
  current_voted_users.add(username)
448
 
449
+ model_a_name = get_model_display_name(current_model_a)
450
+ model_b_name = get_model_display_name(current_model_b)
451
  info_message = (
452
+ f"<p>You voted for <strong style='color:blue;'>{model_b_name}</strong>.</p>"
453
+ f"<p><span style='color:green;'>{model_a_name}</span> - "
454
+ f"<span style='color:blue;'>{model_b_name}</span></p>"
455
  )
456
  gr.Info(info_message)
457
 
458
  except Exception as e:
459
+ logger.error(f"❌ Error adding Model B vote: {e}")
460
  gr.Info(f"Error recording vote: {e}")
461
 
462
  # Event handlers
 
466
  outputs=[gemini_output, mistral_output, gemini_vote_btn, mistral_vote_btn, status_text],
467
  )
468
 
469
+ # Process both randomly selected OCRs when the process button is clicked
470
+ def process_model_a_ocr(image):
471
+ global current_model_a
472
+ return process_model_ocr(image, current_model_a)
473
+
474
+ def process_model_b_ocr(image):
475
+ global current_model_b
476
+ return process_model_ocr(image, current_model_b)
477
+
478
  process_btn.click(
479
+ process_model_a_ocr,
480
  inputs=[image_input],
481
  outputs=[gemini_output],
482
  )
483
 
484
  process_btn.click(
485
+ process_model_b_ocr,
486
  inputs=[image_input],
487
  outputs=[mistral_output],
488
  )
 
501
  )
502
 
503
  gemini_vote_btn.click(
504
+ vote_model_a,
505
  inputs=[login_button]
506
  )
507
 
508
  mistral_vote_btn.click(
509
+ vote_model_b,
510
  inputs=[login_button]
511
  )
512
 
 
516
  inputs=None,
517
  outputs=[votes_table, stats_display]
518
  )
519
+
520
+ # Refresh leaderboard button
521
+ refresh_leaderboard_btn.click(
522
+ load_elo_leaderboard,
523
+ inputs=None,
524
+ outputs=[leaderboard_display]
525
+ )
526
+
527
  # Update username display when user logs in
528
  demo.load(fn=get_default_username, inputs=None, outputs=username_display)
529
+
530
  # Load vote data when app starts
531
  demo.load(fn=load_vote_data, inputs=None, outputs=[votes_table, stats_display])
532
 
533
+ # Load leaderboard when app starts
534
+ demo.load(fn=load_elo_leaderboard, inputs=None, outputs=[leaderboard_display])
535
+
536
  if __name__ == "__main__":
537
  logger.info("Starting OCR Comparison App...")
538
  try:
db.py CHANGED
@@ -6,6 +6,7 @@ avoiding the dependency issues with the supabase client library.
6
  import logging
7
  import requests
8
  import json
 
9
  from typing import Dict, Any, List
10
  from dotenv import load_dotenv
11
  import os
@@ -59,6 +60,8 @@ def test_table_exists(table_name: str = "ocr_votes") -> bool:
59
 
60
  def add_vote(
61
  username: str,
 
 
62
  model_a_output: str,
63
  model_b_output: str,
64
  vote: str,
@@ -72,6 +75,8 @@ def add_vote(
72
 
73
  data = {
74
  "username": username,
 
 
75
  "model_a_output": model_a_output,
76
  "model_b_output": model_b_output,
77
  "vote": vote,
@@ -119,9 +124,11 @@ def test_add_sample_vote() -> bool:
119
  try:
120
  sample_vote = add_vote(
121
  username="test_user",
 
 
122
  model_a_output="# Test Gemini Output\n\nThis is a **test** markdown from Gemini.",
123
  model_b_output="## Test Mistral Output\n\nThis is a *test* markdown from Mistral.",
124
- vote="gemini",
125
  image_url="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAYEBQYFBAYGBQYHBwYIChAKCgkJChQODwwQFxQYGBcUFhYaHSUfGhsjHBYWICwgIyYnKSopGR8tMC0oMCUoKSj/2wBDAQcHBwoIChMKChMoGhYaKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCj/wAARCAABAAEDASIAAhEBAxEB/8QAFQABAQAAAAAAAAAAAAAAAAAAAAv/xAAUEAEAAAAAAAAAAAAAAAAAAAAA/8QAFQEBAQAAAAAAAAAAAAAAAAAAAAX/xAAUEQEAAAAAAAAAAAAAAAAAAAAA/9oADAMBAAIRAxEAPwCdABmX/9k="
126
  )
127
  logger.info(f"✅ Sample vote added: {sample_vote}")
@@ -135,24 +142,113 @@ def get_vote_statistics() -> Dict[str, Any]:
135
  try:
136
  votes = get_all_votes()
137
 
138
- gemini_votes = sum(1 for vote in votes if vote.get('vote') == 'gemini')
139
- mistral_votes = sum(1 for vote in votes if vote.get('vote') == 'mistral')
140
- tie_votes = sum(1 for vote in votes if vote.get('vote') == 'tie')
 
141
  total_votes = len(votes)
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  return {
144
  "total_votes": total_votes,
145
  "gemini_votes": gemini_votes,
146
  "mistral_votes": mistral_votes,
147
- "tie_votes": tie_votes,
148
  "gemini_percentage": (gemini_votes / total_votes * 100) if total_votes > 0 else 0,
149
  "mistral_percentage": (mistral_votes / total_votes * 100) if total_votes > 0 else 0,
150
- "tie_percentage": (tie_votes / total_votes * 100) if total_votes > 0 else 0
151
  }
152
  except Exception as e:
153
  logger.error(f"❌ Error getting vote statistics: {e}")
154
  return {}
155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  if __name__ == "__main__":
157
  print(test_connection())
158
  print(test_add_sample_vote())
 
6
  import logging
7
  import requests
8
  import json
9
+ import math
10
  from typing import Dict, Any, List
11
  from dotenv import load_dotenv
12
  import os
 
60
 
61
  def add_vote(
62
  username: str,
63
+ model_a: str,
64
+ model_b: str,
65
  model_a_output: str,
66
  model_b_output: str,
67
  vote: str,
 
75
 
76
  data = {
77
  "username": username,
78
+ "model_a": model_a,
79
+ "model_b": model_b,
80
  "model_a_output": model_a_output,
81
  "model_b_output": model_b_output,
82
  "vote": vote,
 
124
  try:
125
  sample_vote = add_vote(
126
  username="test_user",
127
+ model_a="gemini",
128
+ model_b="mistral",
129
  model_a_output="# Test Gemini Output\n\nThis is a **test** markdown from Gemini.",
130
  model_b_output="## Test Mistral Output\n\nThis is a *test* markdown from Mistral.",
131
+ vote="model_a",
132
  image_url="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAYEBQYFBAYGBQYHBwYIChAKCgkJChQODwwQFxQYGBcUFhYaHSUfGhsjHBYWICwgIyYnKSopGR8tMC0oMCUoKSj/2wBDAQcHBwoIChMKChMoGhYaKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCj/wAARCAABAAEDASIAAhEBAxEB/8QAFQABAQAAAAAAAAAAAAAAAAAAAAv/xAAUEAEAAAAAAAAAAAAAAAAAAAAA/8QAFQEBAQAAAAAAAAAAAAAAAAAAAAX/xAAUEQEAAAAAAAAAAAAAAAAAAAAA/9oADAMBAAIRAxEAPwCdABmX/9k="
133
  )
134
  logger.info(f"✅ Sample vote added: {sample_vote}")
 
142
  try:
143
  votes = get_all_votes()
144
 
145
+ # Count votes for each model
146
+ gemini_votes = 0
147
+ mistral_votes = 0
148
+ openai_votes = 0
149
  total_votes = len(votes)
150
 
151
+ for vote in votes:
152
+ vote_choice = vote.get('vote')
153
+ model_a = vote.get('model_a')
154
+ model_b = vote.get('model_b')
155
+
156
+ if vote_choice == 'model_a':
157
+ if model_a == 'gemini':
158
+ gemini_votes += 1
159
+ elif model_a == 'mistral':
160
+ mistral_votes += 1
161
+ elif model_a == 'openai':
162
+ openai_votes += 1
163
+ elif vote_choice == 'model_b':
164
+ if model_b == 'gemini':
165
+ gemini_votes += 1
166
+ elif model_b == 'mistral':
167
+ mistral_votes += 1
168
+ elif model_b == 'openai':
169
+ openai_votes += 1
170
+
171
  return {
172
  "total_votes": total_votes,
173
  "gemini_votes": gemini_votes,
174
  "mistral_votes": mistral_votes,
175
+ "openai_votes": openai_votes,
176
  "gemini_percentage": (gemini_votes / total_votes * 100) if total_votes > 0 else 0,
177
  "mistral_percentage": (mistral_votes / total_votes * 100) if total_votes > 0 else 0,
178
+ "openai_percentage": (openai_votes / total_votes * 100) if total_votes > 0 else 0
179
  }
180
  except Exception as e:
181
  logger.error(f"❌ Error getting vote statistics: {e}")
182
  return {}
183
 
184
+ def calculate_elo_rating(rating_a: float, rating_b: float, result_a: float, k_factor: int = 32) -> tuple[float, float]:
185
+ """
186
+ Calculate new ELO ratings for two players after a match.
187
+
188
+ Args:
189
+ rating_a: Current ELO rating of player A
190
+ rating_b: Current ELO rating of player B
191
+ result_a: Result for player A (1 for win, 0.5 for draw, 0 for loss)
192
+ k_factor: K-factor determines how much a single result affects the rating
193
+
194
+ Returns:
195
+ tuple: (new_rating_a, new_rating_b)
196
+ """
197
+ # Calculate expected scores
198
+ expected_a = 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
199
+ expected_b = 1 / (1 + 10 ** ((rating_a - rating_b) / 400))
200
+
201
+ # Calculate new ratings
202
+ new_rating_a = rating_a + k_factor * (result_a - expected_a)
203
+ new_rating_b = rating_b + k_factor * ((1 - result_a) - expected_b)
204
+
205
+ return new_rating_a, new_rating_b
206
+
207
+ def calculate_elo_ratings_from_votes(votes: List[Dict[str, Any]]) -> Dict[str, float]:
208
+ """
209
+ Calculate ELO ratings for all models based on vote history.
210
+
211
+ Args:
212
+ votes: List of vote dictionaries from database
213
+
214
+ Returns:
215
+ dict: Current ELO ratings for each model
216
+ """
217
+ # Initialize ELO ratings (starting at 1500)
218
+ elo_ratings = {
219
+ "gemini": 1500,
220
+ "mistral": 1500,
221
+ "openai": 1500
222
+ }
223
+
224
+ # Process each vote to update ELO ratings
225
+ for vote in votes:
226
+ model_a = vote.get('model_a')
227
+ model_b = vote.get('model_b')
228
+ vote_choice = vote.get('vote')
229
+
230
+ if model_a and model_b and vote_choice:
231
+ # Determine result for model A
232
+ if vote_choice == 'model_a':
233
+ result_a = 1 # Model A wins
234
+ elif vote_choice == 'model_b':
235
+ result_a = 0 # Model A loses
236
+ else:
237
+ continue # Skip invalid votes
238
+
239
+ # Calculate new ELO ratings
240
+ new_rating_a, new_rating_b = calculate_elo_rating(
241
+ elo_ratings[model_a],
242
+ elo_ratings[model_b],
243
+ result_a
244
+ )
245
+
246
+ # Update ratings
247
+ elo_ratings[model_a] = new_rating_a
248
+ elo_ratings[model_b] = new_rating_b
249
+
250
+ return elo_ratings
251
+
252
  if __name__ == "__main__":
253
  print(test_connection())
254
  print(test_add_sample_vote())
ocr_models.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ OCR Models Module
3
+ Contains all OCR-related functions for different AI models.
4
+ """
5
+
6
+ import google.generativeai as genai
7
+ from mistralai import Mistral
8
+ from PIL import Image
9
+ import io
10
+ import base64
11
+ import logging
12
+ import openai
13
+ import os
14
+
15
+ # Configure logging
16
+ logger = logging.getLogger(__name__)
17
+
18
+ def gemini_ocr(image: Image.Image):
19
+ """Process OCR using Google's Gemini 2.0 Flash model."""
20
+ try:
21
+ # Initialize Gemini model
22
+ gemini_model = initialize_gemini()
23
+ if not gemini_model:
24
+ return "Gemini OCR error: Failed to initialize Gemini model"
25
+
26
+ # Convert image to base64
27
+ buffered = io.BytesIO()
28
+ image.save(buffered, format="JPEG")
29
+ img_bytes = buffered.getvalue()
30
+ base64_image = base64.b64encode(img_bytes).decode('utf-8')
31
+
32
+ # Create the image part for Gemini
33
+ image_part = {
34
+ "mime_type": "image/jpeg",
35
+ "data": base64_image
36
+ }
37
+
38
+ # Generate content with Gemini
39
+ response = gemini_model.generate_content([
40
+ "Extract and transcribe all text from this image. Return only the transcribed text in markdown format, preserving any formatting like headers, lists, etc.",
41
+ image_part
42
+ ])
43
+
44
+ markdown_text = response.text
45
+ logger.info("Gemini OCR completed successfully")
46
+ return markdown_text
47
+
48
+ except Exception as e:
49
+ logger.error(f"Gemini OCR error: {e}")
50
+ return f"Gemini OCR error: {e}"
51
+
52
+ def mistral_ocr(image: Image.Image):
53
+ """Process OCR using Mistral AI's OCR model."""
54
+ try:
55
+ # Convert image to base64
56
+ buffered = io.BytesIO()
57
+ image.save(buffered, format="JPEG")
58
+ img_bytes = buffered.getvalue()
59
+ base64_image = base64.b64encode(img_bytes).decode('utf-8')
60
+
61
+ client = Mistral(api_key=os.getenv("MISTRAL_API_KEY"))
62
+ ocr_response = client.ocr.process(
63
+ model="mistral-ocr-latest",
64
+ document={
65
+ "type": "image_url",
66
+ "image_url": f"data:image/jpeg;base64,{base64_image}"
67
+ }
68
+ )
69
+
70
+ # Extract markdown from the first page if available
71
+ markdown_text = ""
72
+ if hasattr(ocr_response, 'pages') and ocr_response.pages:
73
+ page = ocr_response.pages[0]
74
+ markdown_text = getattr(page, 'markdown', "")
75
+
76
+ if not markdown_text:
77
+ markdown_text = str(ocr_response)
78
+
79
+ logger.info("Mistral OCR completed successfully")
80
+ return markdown_text
81
+
82
+ except Exception as e:
83
+ logger.error(f"Mistral OCR error: {e}")
84
+ return f"Mistral OCR error: {e}"
85
+
86
+ def openai_ocr(image: Image.Image):
87
+ """Process OCR using OpenAI's GPT-4o model."""
88
+ try:
89
+ # Convert image to base64
90
+ buffered = io.BytesIO()
91
+ image.save(buffered, format="PNG")
92
+ img_bytes = buffered.getvalue()
93
+ base64_image = base64.b64encode(img_bytes).decode('utf-8')
94
+ image_data_url = f"data:image/png;base64,{base64_image}"
95
+
96
+ # Send request to GPT-4o for OCR
97
+ response = openai.chat.completions.create(
98
+ model="gpt-4o",
99
+ messages=[
100
+ {
101
+ "role": "user",
102
+ "content": [
103
+ {"type": "text", "text": "Extract only the transcribed text in markdown, nothing else."},
104
+ {"type": "image_url", "image_url": {"url": image_data_url}}
105
+ ]
106
+ }
107
+ ]
108
+ )
109
+
110
+ markdown_text = response.choices[0].message.content
111
+ logger.info("OpenAI OCR completed successfully")
112
+ return markdown_text
113
+
114
+ except Exception as e:
115
+ logger.error(f"OpenAI OCR error: {e}")
116
+ return f"OpenAI OCR error: {e}"
117
+
118
+ def process_model_ocr(image, model_name):
119
+ """Process OCR for a specific model."""
120
+ if model_name == "gemini":
121
+ return gemini_ocr(image)
122
+ elif model_name == "mistral":
123
+ return mistral_ocr(image)
124
+ elif model_name == "openai":
125
+ return openai_ocr(image)
126
+ else:
127
+ return f"Unknown model: {model_name}"
128
+
129
+ # Initialize Gemini model
130
+ def initialize_gemini():
131
+ """Initialize the Gemini model with API key."""
132
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
133
+ if gemini_api_key:
134
+ genai.configure(api_key=gemini_api_key)
135
+ logger.info("✅ GEMINI_API_KEY loaded successfully")
136
+ return genai.GenerativeModel('gemini-2.0-flash-exp')
137
+ else:
138
+ logger.error("❌ GEMINI_API_KEY not found in environment variables")
139
+ return None
140
+
141
+ # Initialize OpenAI
142
+ def initialize_openai():
143
+ """Initialize OpenAI with API key."""
144
+ openai_api_key = os.getenv("OPENAI_API_KEY")
145
+ if openai_api_key:
146
+ openai.api_key = openai_api_key
147
+ logger.info("✅ OPENAI_API_KEY loaded successfully")
148
+ else:
149
+ logger.error("❌ OPENAI_API_KEY not found in environment variables")
150
+
151
+ # Initialize Mistral
152
+ def initialize_mistral():
153
+ """Initialize Mistral with API key."""
154
+ mistral_api_key = os.getenv("MISTRAL_API_KEY")
155
+ if mistral_api_key:
156
+ logger.info("✅ MISTRAL_API_KEY loaded successfully")
157
+ else:
158
+ logger.error("❌ MISTRAL_API_KEY not found in environment variables")
ui_helpers.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ UI Helpers Module
3
+ Contains UI formatting and helper functions for the Gradio interface.
4
+ """
5
+
6
+ import logging
7
+ import random
8
+ import math
9
+ from typing import Dict, Any, List
10
+
11
+ # Configure logging
12
+ logger = logging.getLogger(__name__)
13
+
14
+ def get_model_display_name(model_name: str) -> str:
15
+ """Get the display name for a model."""
16
+ model_names = {
17
+ "gemini": "Gemini 2.0 Flash",
18
+ "mistral": "Mistral OCR",
19
+ "openai": "OpenAI GPT-4o"
20
+ }
21
+ return model_names.get(model_name, model_name)
22
+
23
+ def select_random_models() -> tuple[str, str]:
24
+ """Randomly select two models from the three available: gemini, mistral, openai."""
25
+ models = ["gemini", "mistral", "openai"]
26
+ selected_models = random.sample(models, 2)
27
+ return selected_models[0], selected_models[1]
28
+
29
+ def format_votes_table(votes: List[Dict[str, Any]]) -> str:
30
+ """Format votes data into an HTML table with OCR outputs and image thumbnails."""
31
+ if not votes:
32
+ return "<p>No votes found in the database.</p>"
33
+
34
+ # Sort votes by timestamp (latest first)
35
+ sorted_votes = sorted(votes, key=lambda x: x.get('timestamp', ''), reverse=True)
36
+
37
+ html = """
38
+ <table class="vote-table">
39
+ <thead>
40
+ <tr>
41
+ <th>Timestamp</th>
42
+ <th>Username</th>
43
+ <th>Models</th>
44
+ <th>Vote</th>
45
+ <th>Model A Output</th>
46
+ <th>Model B Output</th>
47
+ <th>Image</th>
48
+ </tr>
49
+ </thead>
50
+ <tbody>
51
+ """
52
+
53
+ for vote in sorted_votes:
54
+ timestamp = vote.get('timestamp', 'N/A')
55
+ username = vote.get('username', 'N/A')
56
+ model_a = vote.get('model_a', 'N/A')
57
+ model_b = vote.get('model_b', 'N/A')
58
+ vote_choice = vote.get('vote', 'N/A')
59
+ model_a_output = vote.get('model_a_output', 'N/A')
60
+ model_b_output = vote.get('model_b_output', 'N/A')
61
+ image_url = vote.get('image_url', 'N/A')
62
+
63
+ # Format timestamp - handle both ISO format and our custom format
64
+ if timestamp != 'N/A':
65
+ try:
66
+ from datetime import datetime
67
+ # Check if it's already in our desired format
68
+ if len(timestamp) == 19 and timestamp[10] == ' ':
69
+ # Already in YYYY-MM-DD HH:MM:SS format
70
+ formatted_time = timestamp
71
+ else:
72
+ # Convert from ISO format to our format
73
+ dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
74
+ formatted_time = dt.strftime('%Y-%m-%d %H:%M:%S')
75
+ except:
76
+ formatted_time = timestamp
77
+ else:
78
+ formatted_time = 'N/A'
79
+
80
+ # Get model display names
81
+ model_a_name = get_model_display_name(model_a)
82
+ model_b_name = get_model_display_name(model_b)
83
+ models_display = f"{model_a_name} vs {model_b_name}"
84
+
85
+ # Determine which model was voted for and get its display name
86
+ voted_model_name = ""
87
+ vote_color = "gray"
88
+ if vote_choice == "model_a":
89
+ voted_model_name = model_a_name
90
+ vote_color = "green"
91
+ elif vote_choice == "model_b":
92
+ voted_model_name = model_b_name
93
+ vote_color = "blue"
94
+
95
+ # Truncate OCR outputs for table display
96
+ model_a_preview = model_a_output[:100] + "..." if len(model_a_output) > 100 else model_a_output
97
+ model_b_preview = model_b_output[:100] + "..." if len(model_b_output) > 100 else model_b_output
98
+
99
+ # Fix image URL - use the correct Supabase storage URL format
100
+ if image_url and image_url != 'N/A' and not image_url.startswith('http'):
101
+ # If it's just a path, construct the full URL
102
+ import os
103
+ image_url = f"{os.getenv('SUPABASE_URL')}/storage/v1/object/public/images/{image_url}"
104
+
105
+ # Create image thumbnail or placeholder
106
+ if image_url and image_url != 'N/A':
107
+ image_html = f'<img src="{image_url}" alt="OCR Image" style="width: 80px; height: 60px; object-fit: cover; border-radius: 4px; cursor: pointer;" onclick="window.open(\'{image_url}\', \'_blank\')" title="Click to view full image">'
108
+ else:
109
+ image_html = '<span style="color: #999; font-style: italic;">No image</span>'
110
+
111
+ html += f"""
112
+ <tr>
113
+ <td>{formatted_time}</td>
114
+ <td><strong>{username}</strong></td>
115
+ <td><small>{models_display}</small></td>
116
+ <td style="color: {vote_color}; font-weight: bold;">{voted_model_name}</td>
117
+ <td title="{model_a_output}">{model_a_preview}</td>
118
+ <td title="{model_b_output}">{model_b_preview}</td>
119
+ <td>{image_html}</td>
120
+ </tr>
121
+ """
122
+
123
+ html += """
124
+ </tbody>
125
+ </table>
126
+ """
127
+
128
+ return html
129
+
130
+ def format_statistics(stats: Dict[str, Any]) -> str:
131
+ """Format statistics data into HTML."""
132
+ if not stats:
133
+ return "<p>No statistics available.</p>"
134
+
135
+ total_votes = stats.get('total_votes', 0)
136
+ gemini_votes = stats.get('gemini_votes', 0)
137
+ mistral_votes = stats.get('mistral_votes', 0)
138
+ openai_votes = stats.get('openai_votes', 0)
139
+ gemini_percentage = stats.get('gemini_percentage', 0)
140
+ mistral_percentage = stats.get('mistral_percentage', 0)
141
+ openai_percentage = stats.get('openai_percentage', 0)
142
+
143
+ html = f"""
144
+ <div style="padding: 15px; background-color: #f8f9fa; border-radius: 8px;">
145
+ <h3>Overall Statistics</h3>
146
+ <p><strong>Total Votes:</strong> {total_votes}</p>
147
+
148
+ <h4>Gemini Votes</h4>
149
+ <p><strong>Count:</strong> {gemini_votes} ({gemini_percentage:.1f}%)</p>
150
+
151
+ <h4>Mistral Votes</h4>
152
+ <p><strong>Count:</strong> {mistral_votes} ({mistral_percentage:.1f}%)</p>
153
+
154
+ <h4>OpenAI Votes</h4>
155
+ <p><strong>Count:</strong> {openai_votes} ({openai_percentage:.1f}%)</p>
156
+
157
+ <div style="margin-top: 20px; padding: 10px; background-color: #e9ecef; border-radius: 5px;">
158
+ <h4>Current Winner</h4>
159
+ {f'<p style="color: green; font-weight: bold;">Gemini is leading!</p>' if gemini_votes > max(mistral_votes, openai_votes) else f'<p style="color: blue; font-weight: bold;">Mistral is leading!</p>' if mistral_votes > max(gemini_votes, openai_votes) else f'<p style="color: purple; font-weight: bold;">OpenAI is leading!</p>' if openai_votes > max(gemini_votes, mistral_votes) else '<p style="color: gray; font-weight: bold;">It is a tie!</p>'}
160
+ </div>
161
+ </div>
162
+ """
163
+
164
+ return html
165
+
166
+ def format_elo_leaderboard(elo_ratings: Dict[str, float], vote_counts: Dict[str, int] = None) -> str:
167
+ """Format ELO ratings into a leaderboard HTML table."""
168
+ # Sort models by ELO rating (highest first)
169
+ sorted_models = sorted(elo_ratings.items(), key=lambda x: x[1], reverse=True)
170
+
171
+ html = """
172
+ <div style="padding: 15px; background-color: #f8f9fa; border-radius: 8px;">
173
+ <h3>ELO Leaderboard</h3>
174
+ <p><em>Models are ranked by their ELO rating. Higher ratings indicate better performance.</em></p>
175
+
176
+ <table class="vote-table" style="margin-top: 15px;">
177
+ <thead>
178
+ <tr>
179
+ <th>Rank</th>
180
+ <th>Model</th>
181
+ <th>ELO Rating</th>
182
+ <th>Total Votes</th>
183
+ </tr>
184
+ </thead>
185
+ <tbody>
186
+ """
187
+
188
+ for rank, (model, rating) in enumerate(sorted_models, 1):
189
+ # Get model display name
190
+ display_name = get_model_display_name(model)
191
+
192
+ # Get vote count for this model
193
+ vote_count = vote_counts.get(model, 0) if vote_counts else 0
194
+
195
+ html += f"""
196
+ <tr>
197
+ <td style="font-weight: bold; text-align: center;">{rank}</td>
198
+ <td><strong>{display_name}</strong></td>
199
+ <td style="font-weight: bold;">{rating:.0f}</td>
200
+ <td style="text-align: center;">{vote_count}</td>
201
+ </tr>
202
+ """
203
+
204
+ html += """
205
+ </tbody>
206
+ </table>
207
+ </div>
208
+ """
209
+
210
+ return html