MEDIC-Benchmark

Running

App Files Files Community

tathagataraha commited on Dec 29, 2024

Commit

6c10fa6

1 Parent(s): 8b771ed

[TEMP] Offline

Browse files

Files changed (4) hide show

app.py +1 -0
src/leaderboard/read_evals.py +5 -1
src/populate.py +1 -1
src/submission/submit.py +28 -21

app.py CHANGED Viewed

@@ -75,6 +75,7 @@ open_ended_leaderboard_df = open_ended_original_df.copy()
 _, med_safety_original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, MED_SAFETY_COLS, MED_SAFETY_BENCHMARK_COLS, "score", "med_safety")
 med_safety_leaderboard_df = med_safety_original_df.copy()
 # breakpoint()
 # # Token based results
 # _, token_based_datasets_original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, DATASET_COLS, DATASET_BENCHMARK_COLS, "TokenBasedWithMacroAverage", "datasets")

 _, med_safety_original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, MED_SAFETY_COLS, MED_SAFETY_BENCHMARK_COLS, "score", "med_safety")
 med_safety_leaderboard_df = med_safety_original_df.copy()
+# breakpoint()
 # breakpoint()
 # # Token based results
 # _, token_based_datasets_original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, DATASET_COLS, DATASET_BENCHMARK_COLS, "TokenBasedWithMacroAverage", "datasets")

src/leaderboard/read_evals.py CHANGED Viewed

@@ -45,7 +45,11 @@ class EvalResult:
     def init_from_json_file(self, json_filepath, evaluation_metric):
         """Inits the result from the specific model result file"""
         with open(json_filepath) as fp:
-            data = json.load(fp)
         config = data.get("config")

     def init_from_json_file(self, json_filepath, evaluation_metric):
         """Inits the result from the specific model result file"""
         with open(json_filepath) as fp:
+            try:
+                data = json.load(fp)
+            except:
+                breakpoint()
         config = data.get("config")

src/populate.py CHANGED Viewed

@@ -68,7 +68,7 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
     for run in all_evals:
         # changes to be made here
         status_list = [run["status"]["closed-ended"], run["status"]["open-ended"], run["status"]["med-safety"], run["status"]["cross-examination"]]
-        status_list = status_list[:2]
         if "RUNNING" in status_list:
             running_list.append(run)
         elif "PENDING" in status_list or "RERUN" in status_list:

     for run in all_evals:
         # changes to be made here
         status_list = [run["status"]["closed-ended"], run["status"]["open-ended"], run["status"]["med-safety"], run["status"]["cross-examination"]]
+        status_list = status_list[:3]
         if "RUNNING" in status_list:
             running_list.append(run)
         elif "PENDING" in status_list or "RERUN" in status_list:

src/submission/submit.py CHANGED Viewed

@@ -63,12 +63,17 @@ def add_new_eval(
     global USERS_TO_SUBMISSION_DATES
     if not REQUESTED_MODELS:
         REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
-    user_name = ""
-    model_path = model
-    if "/" in model:
-        user_name = model.split("/")[0]
-        model_path = model.split("/")[1]
     # precision = precision.split(" ")[0]
     current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
@@ -96,22 +101,22 @@ def add_new_eval(
     # Is the model info correctly filled?
     try:
-        model_info = API.model_info(repo_id=model, revision=revision)
     except Exception:
         return styled_error("Could not get your model information. Please fill it up properly.")
-    model_size = get_model_size(model_info=model_info)
-    # Were the model card and license filled?
-    try:
-        license = model_info.cardData["license"]
-    except Exception:
-        return styled_error("Please select a license for your model")
-    modelcard_OK, error_msg = check_model_card(model)
-    if not modelcard_OK:
-        return styled_error(error_msg)
     # Verify the inference config now
     # try:
     #     label_normalization_map = ast.literal_eval(label_normalization_map)
@@ -143,10 +148,10 @@ def add_new_eval(
         },
         "submitted_time": current_time,
         "model_type": model_type,
-        "likes": model_info.likes,
         "num_params": model_size,
         "license": license,
-        "private": False,
         "slurm_id": None
     }
@@ -158,6 +163,8 @@ def add_new_eval(
     print("Creating eval file")
     OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
     os.makedirs(OUT_DIR, exist_ok=True)
     out_path = f"{OUT_DIR}/{model_path}_{revision}_{precision}_{weight_type}_eval_request.json"
     with open(out_path, "w") as f:

     global USERS_TO_SUBMISSION_DATES
     if not REQUESTED_MODELS:
         REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
+    if model.startswith("/"):
+        user_name = ""
+        model_path = model
+        private = True
+    else:
+        user_name = ""
+        model_path = model
+        if "/" in model:
+            user_name = model.split("/")[0]
+            model_path = model.split("/")[1]
+        private = False
     # precision = precision.split(" ")[0]
     current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
     # Is the model info correctly filled?
     try:
+        if model.startswith("/"):
+            model_info = API.model_info(repo_id=model, revision=revision)
+            model_size = get_model_size(model_info=model_info)
+            license = model_info.cardData["license"]
+            modelcard_OK, error_msg = check_model_card(model)
+            if not modelcard_OK:
+                return styled_error(error_msg)
+            likes = model_info.likes
+        else:
+            model_size = None
+            license = None
+            likes = 0
     except Exception:
         return styled_error("Could not get your model information. Please fill it up properly.")
     # Verify the inference config now
     # try:
     #     label_normalization_map = ast.literal_eval(label_normalization_map)
         },
         "submitted_time": current_time,
         "model_type": model_type,
+        "likes": likes,
         "num_params": model_size,
         "license": license,
+        "private": private,
         "slurm_id": None
     }
     print("Creating eval file")
     OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
     os.makedirs(OUT_DIR, exist_ok=True)
+    if model_path.startswith("/"):
+        os.makedirs(f"{OUT_DIR}/{model_path}", exist_ok=True)
     out_path = f"{OUT_DIR}/{model_path}_{revision}_{precision}_{weight_type}_eval_request.json"
     with open(out_path, "w") as f: