Wassymk commited on
Commit
3e159b8
·
1 Parent(s): faea0f8
Files changed (4) hide show
  1. app.py +38 -34
  2. db.py +10 -2
  3. ocr_models.py +33 -0
  4. ui_helpers.py +5 -4
app.py CHANGED
@@ -36,6 +36,8 @@ SUPABASE_KEY = os.getenv("SUPABASE_KEY")
36
  current_gemini_output = ""
37
  current_mistral_output = ""
38
  current_openai_output = ""
 
 
39
  current_image_url = ""
40
  current_voted_users = set() # Track users who have already voted
41
  current_model_a = "" # Store which model was selected as model A
@@ -74,8 +76,7 @@ def process_image(image):
74
  "Please upload an image.",
75
  "Please upload an image.",
76
  gr.update(visible=False), # Hide vote buttons
77
- gr.update(visible=False), # Hide vote buttons
78
- "Please upload an image to start (voting is optional)."
79
  )
80
 
81
  # Reset voted users for new image
@@ -123,8 +124,7 @@ def process_image(image):
123
  "Please click 'Run OCR' to start processing.",
124
  "Please click 'Run OCR' to start processing.",
125
  gr.update(visible=False), # Hide vote buttons initially
126
- gr.update(visible=False), # Hide vote buttons initially
127
- "Image uploaded! Two models have been randomly selected. Click 'Run OCR' to process."
128
  )
129
 
130
  except Exception as e:
@@ -133,13 +133,12 @@ def process_image(image):
133
  f"Error processing image: {e}",
134
  f"Error processing image: {e}",
135
  gr.update(visible=False), # Hide vote buttons
136
- gr.update(visible=False), # Hide vote buttons
137
- f"Error: {e}"
138
  )
139
 
140
  def check_ocr_completion(model_a_output, model_b_output):
141
  """Check if both OCR results are ready and update UI accordingly."""
142
- global current_gemini_output, current_mistral_output, current_openai_output, current_model_a, current_model_b
143
 
144
  # Check if both results are complete (not processing messages)
145
  model_a_ready = (model_a_output and
@@ -162,6 +161,8 @@ def check_ocr_completion(model_a_output, model_b_output):
162
  current_mistral_output = model_a_output
163
  elif current_model_a == "openai":
164
  current_openai_output = model_a_output
 
 
165
 
166
  if model_b_ready:
167
  if current_model_b == "gemini":
@@ -170,26 +171,19 @@ def check_ocr_completion(model_a_output, model_b_output):
170
  current_mistral_output = model_b_output
171
  elif current_model_b == "openai":
172
  current_openai_output = model_b_output
 
 
173
 
174
  # Show vote buttons only when both are ready
175
  if model_a_ready and model_b_ready:
176
  return (
177
  gr.update(visible=True), # Show Model A vote button
178
- gr.update(visible=True), # Show Model B vote button
179
- "OCR completed! You can now vote for your preferred result (optional)."
180
- )
181
- elif model_a_ready or model_b_ready:
182
- ready_count = sum([model_a_ready, model_b_ready])
183
- return (
184
- gr.update(visible=False), # Hide vote buttons
185
- gr.update(visible=False), # Hide vote buttons
186
- f"OCR in progress... ({ready_count}/2 completed)"
187
  )
188
  else:
189
  return (
190
  gr.update(visible=False), # Hide vote buttons
191
- gr.update(visible=False), # Hide vote buttons
192
- "Processing OCR results..."
193
  )
194
 
195
  def load_vote_data():
@@ -321,13 +315,7 @@ with gr.Blocks(title="OCR Comparison", css="""
321
  mistral_vote_btn = gr.Button("B is better", variant="primary", size="sm", visible=False)
322
  mistral_output = gr.Markdown(label="Model B Output", elem_classes=["output-box"])
323
 
324
- # Status indicator
325
- status_text = gr.Textbox(
326
- label="Status",
327
- placeholder="Upload an image and run OCR to compare results (voting is optional)",
328
- interactive=False,
329
- show_label=False
330
- )
331
 
332
  with gr.Row():
333
  process_btn = gr.Button("🔍 Run OCR", variant="primary")
@@ -362,7 +350,7 @@ with gr.Blocks(title="OCR Comparison", css="""
362
 
363
  # Vote functions
364
  def vote_model_a(profile_or_username):
365
- global current_gemini_output, current_mistral_output, current_openai_output, current_image_url, current_voted_users, current_model_a, current_model_b
366
 
367
  # Get current username
368
  username = get_current_username(profile_or_username)
@@ -381,12 +369,20 @@ with gr.Blocks(title="OCR Comparison", css="""
381
 
382
  # Add vote to database
383
  logger.info(f"📊 Adding Model A vote for user: {username}")
 
 
 
 
 
 
 
 
384
  add_vote(
385
  username=username,
386
  model_a=current_model_a,
387
  model_b=current_model_b,
388
- model_a_output=current_gemini_output if current_model_a == "gemini" else current_mistral_output if current_model_a == "mistral" else current_openai_output,
389
- model_b_output=current_gemini_output if current_model_b == "gemini" else current_mistral_output if current_model_b == "mistral" else current_openai_output,
390
  vote="model_a",
391
  image_url=image_url
392
  )
@@ -408,7 +404,7 @@ with gr.Blocks(title="OCR Comparison", css="""
408
  gr.Info(f"Error recording vote: {e}")
409
 
410
  def vote_model_b(profile_or_username):
411
- global current_gemini_output, current_mistral_output, current_openai_output, current_image_url, current_voted_users, current_model_a, current_model_b
412
 
413
  # Get current username
414
  username = get_current_username(profile_or_username)
@@ -427,12 +423,20 @@ with gr.Blocks(title="OCR Comparison", css="""
427
 
428
  # Add vote to database
429
  logger.info(f"📊 Adding Model B vote for user: {username}")
 
 
 
 
 
 
 
 
430
  add_vote(
431
  username=username,
432
  model_a=current_model_a,
433
  model_b=current_model_b,
434
- model_a_output=current_gemini_output if current_model_a == "gemini" else current_mistral_output if current_model_a == "mistral" else current_openai_output,
435
- model_b_output=current_gemini_output if current_model_b == "gemini" else current_mistral_output if current_model_b == "mistral" else current_openai_output,
436
  vote="model_b",
437
  image_url=image_url
438
  )
@@ -457,7 +461,7 @@ with gr.Blocks(title="OCR Comparison", css="""
457
  process_btn.click(
458
  process_image,
459
  inputs=[image_input],
460
- outputs=[gemini_output, mistral_output, gemini_vote_btn, mistral_vote_btn, status_text],
461
  )
462
 
463
  # Process both randomly selected OCRs when the process button is clicked
@@ -485,13 +489,13 @@ with gr.Blocks(title="OCR Comparison", css="""
485
  gemini_output.change(
486
  check_ocr_completion,
487
  inputs=[gemini_output, mistral_output],
488
- outputs=[gemini_vote_btn, mistral_vote_btn, status_text],
489
  )
490
 
491
  mistral_output.change(
492
  check_ocr_completion,
493
  inputs=[gemini_output, mistral_output],
494
- outputs=[gemini_vote_btn, mistral_vote_btn, status_text],
495
  )
496
 
497
  gemini_vote_btn.click(
 
36
  current_gemini_output = ""
37
  current_mistral_output = ""
38
  current_openai_output = ""
39
+ current_gpt5_output = ""
40
+ current_gpt5_output = ""
41
  current_image_url = ""
42
  current_voted_users = set() # Track users who have already voted
43
  current_model_a = "" # Store which model was selected as model A
 
76
  "Please upload an image.",
77
  "Please upload an image.",
78
  gr.update(visible=False), # Hide vote buttons
79
+ gr.update(visible=False) # Hide vote buttons
 
80
  )
81
 
82
  # Reset voted users for new image
 
124
  "Please click 'Run OCR' to start processing.",
125
  "Please click 'Run OCR' to start processing.",
126
  gr.update(visible=False), # Hide vote buttons initially
127
+ gr.update(visible=False) # Hide vote buttons initially
 
128
  )
129
 
130
  except Exception as e:
 
133
  f"Error processing image: {e}",
134
  f"Error processing image: {e}",
135
  gr.update(visible=False), # Hide vote buttons
136
+ gr.update(visible=False) # Hide vote buttons
 
137
  )
138
 
139
  def check_ocr_completion(model_a_output, model_b_output):
140
  """Check if both OCR results are ready and update UI accordingly."""
141
+ global current_gemini_output, current_mistral_output, current_openai_output, current_gpt5_output, current_model_a, current_model_b
142
 
143
  # Check if both results are complete (not processing messages)
144
  model_a_ready = (model_a_output and
 
161
  current_mistral_output = model_a_output
162
  elif current_model_a == "openai":
163
  current_openai_output = model_a_output
164
+ elif current_model_a == "gpt5":
165
+ current_gpt5_output = model_a_output
166
 
167
  if model_b_ready:
168
  if current_model_b == "gemini":
 
171
  current_mistral_output = model_b_output
172
  elif current_model_b == "openai":
173
  current_openai_output = model_b_output
174
+ elif current_model_b == "gpt5":
175
+ current_gpt5_output = model_b_output
176
 
177
  # Show vote buttons only when both are ready
178
  if model_a_ready and model_b_ready:
179
  return (
180
  gr.update(visible=True), # Show Model A vote button
181
+ gr.update(visible=True) # Show Model B vote button
 
 
 
 
 
 
 
 
182
  )
183
  else:
184
  return (
185
  gr.update(visible=False), # Hide vote buttons
186
+ gr.update(visible=False) # Hide vote buttons
 
187
  )
188
 
189
  def load_vote_data():
 
315
  mistral_vote_btn = gr.Button("B is better", variant="primary", size="sm", visible=False)
316
  mistral_output = gr.Markdown(label="Model B Output", elem_classes=["output-box"])
317
 
318
+
 
 
 
 
 
 
319
 
320
  with gr.Row():
321
  process_btn = gr.Button("🔍 Run OCR", variant="primary")
 
350
 
351
  # Vote functions
352
  def vote_model_a(profile_or_username):
353
+ global current_gemini_output, current_mistral_output, current_openai_output, current_gpt5_output, current_image_url, current_voted_users, current_model_a, current_model_b
354
 
355
  # Get current username
356
  username = get_current_username(profile_or_username)
 
369
 
370
  # Add vote to database
371
  logger.info(f"📊 Adding Model A vote for user: {username}")
372
+ def output_for(model: str) -> str:
373
+ return {
374
+ "gemini": current_gemini_output,
375
+ "mistral": current_mistral_output,
376
+ "openai": current_openai_output,
377
+ "gpt5": current_gpt5_output,
378
+ }.get(model, "")
379
+
380
  add_vote(
381
  username=username,
382
  model_a=current_model_a,
383
  model_b=current_model_b,
384
+ model_a_output=output_for(current_model_a),
385
+ model_b_output=output_for(current_model_b),
386
  vote="model_a",
387
  image_url=image_url
388
  )
 
404
  gr.Info(f"Error recording vote: {e}")
405
 
406
  def vote_model_b(profile_or_username):
407
+ global current_gemini_output, current_mistral_output, current_openai_output, current_gpt5_output, current_image_url, current_voted_users, current_model_a, current_model_b
408
 
409
  # Get current username
410
  username = get_current_username(profile_or_username)
 
423
 
424
  # Add vote to database
425
  logger.info(f"📊 Adding Model B vote for user: {username}")
426
+ def output_for(model: str) -> str:
427
+ return {
428
+ "gemini": current_gemini_output,
429
+ "mistral": current_mistral_output,
430
+ "openai": current_openai_output,
431
+ "gpt5": current_gpt5_output,
432
+ }.get(model, "")
433
+
434
  add_vote(
435
  username=username,
436
  model_a=current_model_a,
437
  model_b=current_model_b,
438
+ model_a_output=output_for(current_model_a),
439
+ model_b_output=output_for(current_model_b),
440
  vote="model_b",
441
  image_url=image_url
442
  )
 
461
  process_btn.click(
462
  process_image,
463
  inputs=[image_input],
464
+ outputs=[gemini_output, mistral_output, gemini_vote_btn, mistral_vote_btn],
465
  )
466
 
467
  # Process both randomly selected OCRs when the process button is clicked
 
489
  gemini_output.change(
490
  check_ocr_completion,
491
  inputs=[gemini_output, mistral_output],
492
+ outputs=[gemini_vote_btn, mistral_vote_btn],
493
  )
494
 
495
  mistral_output.change(
496
  check_ocr_completion,
497
  inputs=[gemini_output, mistral_output],
498
+ outputs=[gemini_vote_btn, mistral_vote_btn],
499
  )
500
 
501
  gemini_vote_btn.click(
db.py CHANGED
@@ -146,6 +146,7 @@ def get_vote_statistics() -> Dict[str, Any]:
146
  gemini_votes = 0
147
  mistral_votes = 0
148
  openai_votes = 0
 
149
  total_votes = len(votes)
150
 
151
  for vote in votes:
@@ -160,6 +161,8 @@ def get_vote_statistics() -> Dict[str, Any]:
160
  mistral_votes += 1
161
  elif model_a == 'openai':
162
  openai_votes += 1
 
 
163
  elif vote_choice == 'model_b':
164
  if model_b == 'gemini':
165
  gemini_votes += 1
@@ -167,15 +170,19 @@ def get_vote_statistics() -> Dict[str, Any]:
167
  mistral_votes += 1
168
  elif model_b == 'openai':
169
  openai_votes += 1
 
 
170
 
171
  return {
172
  "total_votes": total_votes,
173
  "gemini_votes": gemini_votes,
174
  "mistral_votes": mistral_votes,
175
  "openai_votes": openai_votes,
 
176
  "gemini_percentage": (gemini_votes / total_votes * 100) if total_votes > 0 else 0,
177
  "mistral_percentage": (mistral_votes / total_votes * 100) if total_votes > 0 else 0,
178
- "openai_percentage": (openai_votes / total_votes * 100) if total_votes > 0 else 0
 
179
  }
180
  except Exception as e:
181
  logger.error(f"❌ Error getting vote statistics: {e}")
@@ -218,7 +225,8 @@ def calculate_elo_ratings_from_votes(votes: List[Dict[str, Any]]) -> Dict[str, f
218
  elo_ratings = {
219
  "gemini": 1500,
220
  "mistral": 1500,
221
- "openai": 1500
 
222
  }
223
 
224
  # Process each vote to update ELO ratings
 
146
  gemini_votes = 0
147
  mistral_votes = 0
148
  openai_votes = 0
149
+ gpt5_votes = 0
150
  total_votes = len(votes)
151
 
152
  for vote in votes:
 
161
  mistral_votes += 1
162
  elif model_a == 'openai':
163
  openai_votes += 1
164
+ elif model_a == 'gpt5':
165
+ gpt5_votes += 1
166
  elif vote_choice == 'model_b':
167
  if model_b == 'gemini':
168
  gemini_votes += 1
 
170
  mistral_votes += 1
171
  elif model_b == 'openai':
172
  openai_votes += 1
173
+ elif model_b == 'gpt5':
174
+ gpt5_votes += 1
175
 
176
  return {
177
  "total_votes": total_votes,
178
  "gemini_votes": gemini_votes,
179
  "mistral_votes": mistral_votes,
180
  "openai_votes": openai_votes,
181
+ "gpt5_votes": gpt5_votes,
182
  "gemini_percentage": (gemini_votes / total_votes * 100) if total_votes > 0 else 0,
183
  "mistral_percentage": (mistral_votes / total_votes * 100) if total_votes > 0 else 0,
184
+ "openai_percentage": (openai_votes / total_votes * 100) if total_votes > 0 else 0,
185
+ "gpt5_percentage": (gpt5_votes / total_votes * 100) if total_votes > 0 else 0
186
  }
187
  except Exception as e:
188
  logger.error(f"❌ Error getting vote statistics: {e}")
 
225
  elo_ratings = {
226
  "gemini": 1500,
227
  "mistral": 1500,
228
+ "openai": 1500,
229
+ "gpt5": 1500
230
  }
231
 
232
  # Process each vote to update ELO ratings
ocr_models.py CHANGED
@@ -115,6 +115,37 @@ def openai_ocr(image: Image.Image):
115
  logger.error(f"OpenAI OCR error: {e}")
116
  return f"OpenAI OCR error: {e}"
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  def process_model_ocr(image, model_name):
119
  """Process OCR for a specific model."""
120
  if model_name == "gemini":
@@ -123,6 +154,8 @@ def process_model_ocr(image, model_name):
123
  return mistral_ocr(image)
124
  elif model_name == "openai":
125
  return openai_ocr(image)
 
 
126
  else:
127
  return f"Unknown model: {model_name}"
128
 
 
115
  logger.error(f"OpenAI OCR error: {e}")
116
  return f"OpenAI OCR error: {e}"
117
 
118
+ def gpt5_ocr(image: Image.Image):
119
+ """Process OCR using OpenAI's GPT-5 model with the same prompt."""
120
+ try:
121
+ # Convert image to base64 (PNG) and use as data URL
122
+ buffered = io.BytesIO()
123
+ image.save(buffered, format="PNG")
124
+ img_bytes = buffered.getvalue()
125
+ base64_image = base64.b64encode(img_bytes).decode('utf-8')
126
+ image_data_url = f"data:image/png;base64,{base64_image}"
127
+
128
+ # Use Chat Completions style content for multimodal reliability
129
+ response = openai.chat.completions.create(
130
+ model="gpt-5",
131
+ messages=[
132
+ {
133
+ "role": "user",
134
+ "content": [
135
+ {"type": "text", "text": "Extract and transcribe all text from this image. Return only the transcribed text in markdown format, preserving any formatting like headers, lists, etc."},
136
+ {"type": "image_url", "image_url": {"url": image_data_url}}
137
+ ]
138
+ }
139
+ ]
140
+ )
141
+
142
+ markdown_text = response.choices[0].message.content
143
+ logger.info("GPT-5 OCR completed successfully")
144
+ return markdown_text
145
+ except Exception as e:
146
+ logger.error(f"GPT-5 OCR error: {e}")
147
+ return f"GPT-5 OCR error: {e}"
148
+
149
  def process_model_ocr(image, model_name):
150
  """Process OCR for a specific model."""
151
  if model_name == "gemini":
 
154
  return mistral_ocr(image)
155
  elif model_name == "openai":
156
  return openai_ocr(image)
157
+ elif model_name == "gpt5":
158
+ return gpt5_ocr(image)
159
  else:
160
  return f"Unknown model: {model_name}"
161
 
ui_helpers.py CHANGED
@@ -16,13 +16,14 @@ def get_model_display_name(model_name: str) -> str:
16
  model_names = {
17
  "gemini": "Gemini 2.0 Flash",
18
  "mistral": "Mistral OCR",
19
- "openai": "OpenAI GPT-4o"
 
20
  }
21
  return model_names.get(model_name, model_name)
22
 
23
  def select_random_models() -> tuple[str, str]:
24
- """Randomly select two models from the three available: gemini, mistral, openai."""
25
- models = ["gemini", "mistral", "openai"]
26
  selected_models = random.sample(models, 2)
27
  return selected_models[0], selected_models[1]
28
 
@@ -147,7 +148,7 @@ def format_elo_leaderboard(elo_ratings: Dict[str, float], vote_counts: Dict[str,
147
  <th>Rank</th>
148
  <th>Model</th>
149
  <th>ELO Rating</th>
150
- <th>Total Votes</th>
151
  </tr>
152
  </thead>
153
  <tbody>
 
16
  model_names = {
17
  "gemini": "Gemini 2.0 Flash",
18
  "mistral": "Mistral OCR",
19
+ "openai": "OpenAI GPT-4o",
20
+ "gpt5": "OpenAI GPT-5"
21
  }
22
  return model_names.get(model_name, model_name)
23
 
24
  def select_random_models() -> tuple[str, str]:
25
+ """Randomly select two models from the available list including gpt5."""
26
+ models = ["gemini", "mistral", "openai", "gpt5"]
27
  selected_models = random.sample(models, 2)
28
  return selected_models[0], selected_models[1]
29
 
 
148
  <th>Rank</th>
149
  <th>Model</th>
150
  <th>ELO Rating</th>
151
+ <th>Total Votes</th>
152
  </tr>
153
  </thead>
154
  <tbody>