Spaces:

gaia-benchmark
/

leaderboard

Running on CPU Upgrade

App Files Files Community

Clémentine commited on 26 days ago

Commit

c2e4da0

1 Parent(s): e703fd8

push validation to public dataset

Browse files

Files changed (1) hide show

app.py +14 -2

app.py CHANGED Viewed

@@ -21,6 +21,7 @@ OWNER="gaia-benchmark"
 DATA_DATASET = f"{OWNER}/GAIA"
 INTERNAL_DATA_DATASET = f"{OWNER}/GAIA_internal"
 SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
 CONTACT_DATASET = f"{OWNER}/contact_info"
 RESULTS_DATASET = f"{OWNER}/results_public"
 LEADERBOARD_PATH = f"{OWNER}/leaderboard"
@@ -76,6 +77,7 @@ def add_new_eval(
     organisation: str,
     mail: str,
 ):
     # Very basic email parsing
     _, parsed_mail = parseaddr(mail)
     if not "@" in parsed_mail:
@@ -84,7 +86,7 @@ def add_new_eval(
     print("Adding new eval")
     # Check if the combination model/org already exists and prints a warning message if yes
-    if model.lower() in set([m.lower() for m in eval_results[val_or_test]["model"]]) and organisation.lower() in set([o.lower() for l in eval_results[val_or_test]["organisation"]]):
         return format_warning("This model has been already submitted.")
     if path_to_file is None:
@@ -135,7 +137,7 @@ def add_new_eval(
                 scores[level] += score
                 num_questions["all"] += 1
                 num_questions[level] += 1
     # Save scored file
     api.upload_file(
         repo_id=SUBMISSION_DATASET,
@@ -145,6 +147,16 @@ def add_new_eval(
         token=TOKEN
     )
     # Actual submission
     eval_entry = {
         "model": model,

 DATA_DATASET = f"{OWNER}/GAIA"
 INTERNAL_DATA_DATASET = f"{OWNER}/GAIA_internal"
 SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
+SUBMISSION_DATASET_PUBLIC = f"{OWNER}/submissions_public"
 CONTACT_DATASET = f"{OWNER}/contact_info"
 RESULTS_DATASET = f"{OWNER}/results_public"
 LEADERBOARD_PATH = f"{OWNER}/leaderboard"
     organisation: str,
     mail: str,
 ):
+    is_validation = val_or_test == "validation"
     # Very basic email parsing
     _, parsed_mail = parseaddr(mail)
     if not "@" in parsed_mail:
     print("Adding new eval")
     # Check if the combination model/org already exists and prints a warning message if yes
+    if model.lower() in set([m.lower() for m in eval_results[val_or_test]["model"]]) and organisation.lower() in set([o.lower() for o in eval_results[val_or_test]["organisation"]]):
         return format_warning("This model has been already submitted.")
     if path_to_file is None:
                 scores[level] += score
                 num_questions["all"] += 1
                 num_questions[level] += 1
     # Save scored file
     api.upload_file(
         repo_id=SUBMISSION_DATASET,
         token=TOKEN
     )
+    # Save scored file
+    if is_validation:
+        api.upload_file(
+            repo_id=SUBMISSION_DATASET_PUBLIC,
+            path_or_fileobj=f"scored/{organisation}_{model}.jsonl",
+            path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_scored_{datetime.datetime.today()}.jsonl",
+            repo_type="dataset",
+            token=TOKEN
+        )
     # Actual submission
     eval_entry = {
         "model": model,