Spaces:

MBZUAI-LLM
/

Mobile-MMLU-Challenge

Running

App Files Files Community

SondosMB commited on Mar 26

Commit

15fabfc

verified ·

1 Parent(s): 6c03d0d

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -15

app.py CHANGED Viewed

@@ -536,17 +536,17 @@ def evaluate_predictions(prediction_file, model_name,Team_name ,add_to_leaderboa
-        # **Check if questions match**
         pred_question_ids = set(predictions_df['question_id'])
         gt_question_ids = set(ground_truth_df['question_id'])
-        missing_in_gt = pred_question_ids - gt_question_ids
-        missing_in_pred = gt_question_ids - pred_question_ids
-        if missing_in_gt:
-            return f"Error: Some question IDs in predictions are missing from the ground truth: {missing_in_gt}", load_leaderboard_pro()
-        if missing_in_pred:
-            return f"Warning: Some question IDs in ground truth are missing from the predictions: {missing_in_pred}", load_leaderboard_pro()
         merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
         merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
@@ -607,18 +607,17 @@ def evaluate_predictions_pro(prediction_file, model_name,Team_name ,add_to_leade
             return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard_pro()
-        # **Check if questions match**
         pred_question_ids = set(predictions_df['question_id'])
         gt_question_ids = set(ground_truth_df['question_id'])
-        missing_in_gt = pred_question_ids - gt_question_ids
-        missing_in_pred = gt_question_ids - pred_question_ids
-        if missing_in_gt:
-            return f"Error: Some question IDs in predictions are missing from the ground truth: {missing_in_gt}", load_leaderboard_pro()
-        if missing_in_pred:
-            return f"Warning: Some question IDs in ground truth are missing from the predictions: {missing_in_pred}", load_leaderboard_pro()
         # Merge and evaluate
         merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')

+        # Convert to sets for comparison
         pred_question_ids = set(predictions_df['question_id'])
         gt_question_ids = set(ground_truth_df['question_id'])
+        # Check if all ground truth questions are in predictions
+        if gt_question_ids != pred_question_ids:
+            return "Error: Prediction file does not contain all question IDs from the ground truth.", load_leaderboard()
+        # Check if the order of question IDs is the same
+        if not (ground_truth_df['question_id'].tolist() == predictions_df['question_id'].tolist()):
+            return "Error: The order of question IDs in the prediction file does not match the ground truth.", load_leaderboard()
         merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
         merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
             return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard_pro()
+       # Convert to sets for comparison
         pred_question_ids = set(predictions_df['question_id'])
         gt_question_ids = set(ground_truth_df['question_id'])
+        # Check if all ground truth questions are in predictions
+        if gt_question_ids != pred_question_ids:
+            return "Error: Prediction file does not contain all question IDs from the ground truth.", load_leaderboard_pro()
+        # Check if the order of question IDs is the same
+        if not (ground_truth_df['question_id'].tolist() == predictions_df['question_id'].tolist()):
+            return "Error: The order of question IDs in the prediction file does not match the ground truth.", load_leaderboard_pro()
         # Merge and evaluate
         merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')