Update app.py
Browse files
app.py
CHANGED
|
@@ -536,17 +536,17 @@ def evaluate_predictions(prediction_file, model_name,Team_name ,add_to_leaderboa
|
|
| 536 |
|
| 537 |
|
| 538 |
|
| 539 |
-
#
|
| 540 |
pred_question_ids = set(predictions_df['question_id'])
|
| 541 |
gt_question_ids = set(ground_truth_df['question_id'])
|
| 542 |
|
| 543 |
-
|
| 544 |
-
|
|
|
|
| 545 |
|
| 546 |
-
if
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
return f"Warning: Some question IDs in ground truth are missing from the predictions: {missing_in_pred}", load_leaderboard_pro()
|
| 550 |
|
| 551 |
merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
|
| 552 |
merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
|
|
@@ -607,18 +607,17 @@ def evaluate_predictions_pro(prediction_file, model_name,Team_name ,add_to_leade
|
|
| 607 |
return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard_pro()
|
| 608 |
|
| 609 |
|
| 610 |
-
|
| 611 |
pred_question_ids = set(predictions_df['question_id'])
|
| 612 |
gt_question_ids = set(ground_truth_df['question_id'])
|
| 613 |
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
if missing_in_gt:
|
| 618 |
-
return f"Error: Some question IDs in predictions are missing from the ground truth: {missing_in_gt}", load_leaderboard_pro()
|
| 619 |
-
if missing_in_pred:
|
| 620 |
-
return f"Warning: Some question IDs in ground truth are missing from the predictions: {missing_in_pred}", load_leaderboard_pro()
|
| 621 |
|
|
|
|
|
|
|
|
|
|
| 622 |
# Merge and evaluate
|
| 623 |
|
| 624 |
merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
|
|
|
|
| 536 |
|
| 537 |
|
| 538 |
|
| 539 |
+
# Convert to sets for comparison
|
| 540 |
pred_question_ids = set(predictions_df['question_id'])
|
| 541 |
gt_question_ids = set(ground_truth_df['question_id'])
|
| 542 |
|
| 543 |
+
# Check if all ground truth questions are in predictions
|
| 544 |
+
if gt_question_ids != pred_question_ids:
|
| 545 |
+
return "Error: Prediction file does not contain all question IDs from the ground truth.", load_leaderboard()
|
| 546 |
|
| 547 |
+
# Check if the order of question IDs is the same
|
| 548 |
+
if not (ground_truth_df['question_id'].tolist() == predictions_df['question_id'].tolist()):
|
| 549 |
+
return "Error: The order of question IDs in the prediction file does not match the ground truth.", load_leaderboard()
|
|
|
|
| 550 |
|
| 551 |
merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
|
| 552 |
merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
|
|
|
|
| 607 |
return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard_pro()
|
| 608 |
|
| 609 |
|
| 610 |
+
# Convert to sets for comparison
|
| 611 |
pred_question_ids = set(predictions_df['question_id'])
|
| 612 |
gt_question_ids = set(ground_truth_df['question_id'])
|
| 613 |
|
| 614 |
+
# Check if all ground truth questions are in predictions
|
| 615 |
+
if gt_question_ids != pred_question_ids:
|
| 616 |
+
return "Error: Prediction file does not contain all question IDs from the ground truth.", load_leaderboard_pro()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 617 |
|
| 618 |
+
# Check if the order of question IDs is the same
|
| 619 |
+
if not (ground_truth_df['question_id'].tolist() == predictions_df['question_id'].tolist()):
|
| 620 |
+
return "Error: The order of question IDs in the prediction file does not match the ground truth.", load_leaderboard_pro()
|
| 621 |
# Merge and evaluate
|
| 622 |
|
| 623 |
merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
|