Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -365,13 +365,21 @@ LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
|
|
| 365 |
|
| 366 |
def initialize_leaderboard_file():
|
| 367 |
"""
|
| 368 |
-
|
| 369 |
"""
|
| 370 |
if not os.path.exists(LEADERBOARD_FILE):
|
|
|
|
| 371 |
pd.DataFrame(columns=[
|
| 372 |
"Model Name", "Overall Accuracy", "Valid Accuracy",
|
| 373 |
"Correct Predictions", "Total Questions", "Timestamp"
|
| 374 |
]).to_csv(LEADERBOARD_FILE, index=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 375 |
|
| 376 |
def clean_answer(answer):
|
| 377 |
"""
|
|
@@ -405,7 +413,7 @@ def load_leaderboard():
|
|
| 405 |
"""
|
| 406 |
Load all submissions from the leaderboard file.
|
| 407 |
"""
|
| 408 |
-
if not os.path.exists(LEADERBOARD_FILE):
|
| 409 |
return pd.DataFrame({
|
| 410 |
"Model Name": [],
|
| 411 |
"Overall Accuracy": [],
|
|
@@ -416,9 +424,9 @@ def load_leaderboard():
|
|
| 416 |
})
|
| 417 |
return pd.read_csv(LEADERBOARD_FILE)
|
| 418 |
|
| 419 |
-
def
|
| 420 |
"""
|
| 421 |
-
Evaluate predictions
|
| 422 |
"""
|
| 423 |
ground_truth_file = "ground_truth.csv"
|
| 424 |
if not os.path.exists(ground_truth_file):
|
|
@@ -430,7 +438,6 @@ def evaluate_predictions_and_update_leaderboard(prediction_file):
|
|
| 430 |
# Load predictions and ground truth
|
| 431 |
predictions_df = pd.read_csv(prediction_file.name)
|
| 432 |
ground_truth_df = pd.read_csv(ground_truth_file)
|
| 433 |
-
model_name = os.path.basename(prediction_file.name).split('_')[1].split('.')[0]
|
| 434 |
|
| 435 |
# Merge predictions with ground truth
|
| 436 |
merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
|
|
@@ -447,16 +454,19 @@ def evaluate_predictions_and_update_leaderboard(prediction_file):
|
|
| 447 |
valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
|
| 448 |
|
| 449 |
results = {
|
| 450 |
-
'model_name': model_name,
|
| 451 |
'overall_accuracy': overall_accuracy,
|
| 452 |
'valid_accuracy': valid_accuracy,
|
| 453 |
'correct_predictions': correct_predictions,
|
| 454 |
'total_questions': total_predictions,
|
| 455 |
}
|
| 456 |
|
| 457 |
-
# Update leaderboard
|
| 458 |
-
|
| 459 |
-
|
|
|
|
|
|
|
|
|
|
| 460 |
except Exception as e:
|
| 461 |
return f"Error during evaluation: {str(e)}", load_leaderboard()
|
| 462 |
|
|
@@ -471,6 +481,8 @@ with gr.Blocks() as demo:
|
|
| 471 |
# Submission Tab
|
| 472 |
with gr.TabItem("🏅 Submission"):
|
| 473 |
file_input = gr.File(label="Upload Prediction CSV")
|
|
|
|
|
|
|
| 474 |
eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
|
| 475 |
leaderboard_table_preview = gr.Dataframe(
|
| 476 |
value=load_leaderboard(),
|
|
@@ -480,8 +492,8 @@ with gr.Blocks() as demo:
|
|
| 480 |
)
|
| 481 |
eval_button = gr.Button("Evaluate and Update Leaderboard")
|
| 482 |
eval_button.click(
|
| 483 |
-
|
| 484 |
-
inputs=[file_input],
|
| 485 |
outputs=[eval_status, leaderboard_table_preview],
|
| 486 |
)
|
| 487 |
|
|
|
|
| 365 |
|
| 366 |
def initialize_leaderboard_file():
|
| 367 |
"""
|
| 368 |
+
Ensure the leaderboard file exists and has the correct headers.
|
| 369 |
"""
|
| 370 |
if not os.path.exists(LEADERBOARD_FILE):
|
| 371 |
+
# Create the file with headers
|
| 372 |
pd.DataFrame(columns=[
|
| 373 |
"Model Name", "Overall Accuracy", "Valid Accuracy",
|
| 374 |
"Correct Predictions", "Total Questions", "Timestamp"
|
| 375 |
]).to_csv(LEADERBOARD_FILE, index=False)
|
| 376 |
+
else:
|
| 377 |
+
# Check if the file is empty and write headers if needed
|
| 378 |
+
if os.stat(LEADERBOARD_FILE).st_size == 0:
|
| 379 |
+
pd.DataFrame(columns=[
|
| 380 |
+
"Model Name", "Overall Accuracy", "Valid Accuracy",
|
| 381 |
+
"Correct Predictions", "Total Questions", "Timestamp"
|
| 382 |
+
]).to_csv(LEADERBOARD_FILE, index=False)
|
| 383 |
|
| 384 |
def clean_answer(answer):
|
| 385 |
"""
|
|
|
|
| 413 |
"""
|
| 414 |
Load all submissions from the leaderboard file.
|
| 415 |
"""
|
| 416 |
+
if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
|
| 417 |
return pd.DataFrame({
|
| 418 |
"Model Name": [],
|
| 419 |
"Overall Accuracy": [],
|
|
|
|
| 424 |
})
|
| 425 |
return pd.read_csv(LEADERBOARD_FILE)
|
| 426 |
|
| 427 |
+
def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
|
| 428 |
"""
|
| 429 |
+
Evaluate predictions and optionally add results to the leaderboard.
|
| 430 |
"""
|
| 431 |
ground_truth_file = "ground_truth.csv"
|
| 432 |
if not os.path.exists(ground_truth_file):
|
|
|
|
| 438 |
# Load predictions and ground truth
|
| 439 |
predictions_df = pd.read_csv(prediction_file.name)
|
| 440 |
ground_truth_df = pd.read_csv(ground_truth_file)
|
|
|
|
| 441 |
|
| 442 |
# Merge predictions with ground truth
|
| 443 |
merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
|
|
|
|
| 454 |
valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
|
| 455 |
|
| 456 |
results = {
|
| 457 |
+
'model_name': model_name if model_name else "Unknown Model",
|
| 458 |
'overall_accuracy': overall_accuracy,
|
| 459 |
'valid_accuracy': valid_accuracy,
|
| 460 |
'correct_predictions': correct_predictions,
|
| 461 |
'total_questions': total_predictions,
|
| 462 |
}
|
| 463 |
|
| 464 |
+
# Update leaderboard only if opted in
|
| 465 |
+
if add_to_leaderboard:
|
| 466 |
+
update_leaderboard(results)
|
| 467 |
+
return "Evaluation completed and added to leaderboard.", load_leaderboard()
|
| 468 |
+
else:
|
| 469 |
+
return "Evaluation completed but not added to leaderboard.", load_leaderboard()
|
| 470 |
except Exception as e:
|
| 471 |
return f"Error during evaluation: {str(e)}", load_leaderboard()
|
| 472 |
|
|
|
|
| 481 |
# Submission Tab
|
| 482 |
with gr.TabItem("🏅 Submission"):
|
| 483 |
file_input = gr.File(label="Upload Prediction CSV")
|
| 484 |
+
model_name_input = gr.Textbox(label="Model Name", placeholder="Enter your model name")
|
| 485 |
+
add_to_leaderboard_checkbox = gr.Checkbox(label="Add to Leaderboard?", value=True)
|
| 486 |
eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
|
| 487 |
leaderboard_table_preview = gr.Dataframe(
|
| 488 |
value=load_leaderboard(),
|
|
|
|
| 492 |
)
|
| 493 |
eval_button = gr.Button("Evaluate and Update Leaderboard")
|
| 494 |
eval_button.click(
|
| 495 |
+
evaluate_predictions,
|
| 496 |
+
inputs=[file_input, model_name_input, add_to_leaderboard_checkbox],
|
| 497 |
outputs=[eval_status, leaderboard_table_preview],
|
| 498 |
)
|
| 499 |
|