Spaces:

MBZUAI-LLM
/

Mobile-MMLU-Challenge

Sleeping

App Files Files Community

SondosMB commited on Dec 20, 2024

Commit

c308901

verified ·

1 Parent(s): 5d5c6ec

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -15

app.py CHANGED Viewed

@@ -4,6 +4,8 @@ import os
 import re
 from datetime import datetime
 def clean_answer(answer):
     if pd.isna(answer):
         return None
@@ -48,6 +50,28 @@ def write_evaluation_results(results, output_file):
     print('\n'.join(output_text))
     print(f"\nResults have been saved to: {output_file}")
 def evaluate_predictions(prediction_file):
     ground_truth_file = "ground_truth.csv"  # Specify the path to the ground truth file
     if not prediction_file:
@@ -120,28 +144,33 @@ def evaluate_predictions(prediction_file):
             'field_performance': field_metrics
         }
         output_file = "evaluation_results.txt"
         write_evaluation_results(results, output_file)
-        return "Evaluation completed successfully!", output_file
     except Exception as e:
         return f"Error during evaluation: {str(e)}", None
 # Gradio Interface
-description = "Upload a prediction CSV file to evaluate predictions against the ground truth stored in the system."
-demo = gr.Interface(
-    fn=evaluate_predictions,
-    inputs=[
-        gr.File(label="Upload Prediction CSV")
-    ],
-    outputs=[
-        gr.Textbox(label="Evaluation Status"),
-        gr.File(label="Download Evaluation Results")
-    ],
-    title="Prediction Evaluation Tool",
-    description=description
-)
 if __name__ == "__main__":
     demo.launch()

 import re
 from datetime import datetime
+LEADERBOARD_FILE = "leaderboard.csv"  # File to store leaderboard data
 def clean_answer(answer):
     if pd.isna(answer):
         return None
     print('\n'.join(output_text))
     print(f"\nResults have been saved to: {output_file}")
+def update_leaderboard(results):
+    # Add results to the leaderboard file
+    new_entry = {
+        "Model Name": results['model_name'],
+        "Overall Accuracy": f"{results['overall_accuracy']:.2%}",
+        "Valid Accuracy": f"{results['valid_accuracy']:.2%}",
+        "Correct Predictions": results['correct_predictions'],
+        "Total Questions": results['total_questions'],
+        "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    }
+    leaderboard_df = pd.DataFrame([new_entry])
+    if os.path.exists(LEADERBOARD_FILE):
+        existing_df = pd.read_csv(LEADERBOARD_FILE)
+        leaderboard_df = pd.concat([existing_df, leaderboard_df], ignore_index=True)
+    leaderboard_df.to_csv(LEADERBOARD_FILE, index=False)
+def display_leaderboard():
+    if not os.path.exists(LEADERBOARD_FILE):
+        return "Leaderboard is empty."
+    leaderboard_df = pd.read_csv(LEADERBOARD_FILE)
+    return leaderboard_df.to_markdown(index=False)
 def evaluate_predictions(prediction_file):
     ground_truth_file = "ground_truth.csv"  # Specify the path to the ground truth file
     if not prediction_file:
             'field_performance': field_metrics
         }
+        update_leaderboard(results)
         output_file = "evaluation_results.txt"
         write_evaluation_results(results, output_file)
+        return "Evaluation completed successfully! Leaderboard updated.", output_file
     except Exception as e:
         return f"Error during evaluation: {str(e)}", None
 # Gradio Interface
+description = "Upload a prediction CSV file to evaluate predictions against the ground truth and update the leaderboard."
+demo = gr.Blocks()
+with demo:
+    gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
+    with gr.Tab("Evaluate"):
+        file_input = gr.File(label="Upload Prediction CSV")
+        eval_status = gr.Textbox(label="Evaluation Status")
+        eval_results_file = gr.File(label="Download Evaluation Results")
+        eval_button = gr.Button("Evaluate")
+        eval_button.click(
+            evaluate_predictions, inputs=file_input, outputs=[eval_status, eval_results_file]
+        )
+    with gr.Tab("Leaderboard"):
+        leaderboard_text = gr.Textbox(label="Leaderboard", interactive=False)
+        refresh_button = gr.Button("Refresh Leaderboard")
+        refresh_button.click(display_leaderboard, outputs=leaderboard_text)
 if __name__ == "__main__":
     demo.launch()