Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Clémentine
commited on
Commit
·
c2e4da0
1
Parent(s):
e703fd8
push validation to public dataset
Browse files
app.py
CHANGED
@@ -21,6 +21,7 @@ OWNER="gaia-benchmark"
|
|
21 |
DATA_DATASET = f"{OWNER}/GAIA"
|
22 |
INTERNAL_DATA_DATASET = f"{OWNER}/GAIA_internal"
|
23 |
SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
|
|
|
24 |
CONTACT_DATASET = f"{OWNER}/contact_info"
|
25 |
RESULTS_DATASET = f"{OWNER}/results_public"
|
26 |
LEADERBOARD_PATH = f"{OWNER}/leaderboard"
|
@@ -76,6 +77,7 @@ def add_new_eval(
|
|
76 |
organisation: str,
|
77 |
mail: str,
|
78 |
):
|
|
|
79 |
# Very basic email parsing
|
80 |
_, parsed_mail = parseaddr(mail)
|
81 |
if not "@" in parsed_mail:
|
@@ -84,7 +86,7 @@ def add_new_eval(
|
|
84 |
print("Adding new eval")
|
85 |
|
86 |
# Check if the combination model/org already exists and prints a warning message if yes
|
87 |
-
if model.lower() in set([m.lower() for m in eval_results[val_or_test]["model"]]) and organisation.lower() in set([o.lower() for
|
88 |
return format_warning("This model has been already submitted.")
|
89 |
|
90 |
if path_to_file is None:
|
@@ -135,7 +137,7 @@ def add_new_eval(
|
|
135 |
scores[level] += score
|
136 |
num_questions["all"] += 1
|
137 |
num_questions[level] += 1
|
138 |
-
|
139 |
# Save scored file
|
140 |
api.upload_file(
|
141 |
repo_id=SUBMISSION_DATASET,
|
@@ -145,6 +147,16 @@ def add_new_eval(
|
|
145 |
token=TOKEN
|
146 |
)
|
147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
# Actual submission
|
149 |
eval_entry = {
|
150 |
"model": model,
|
|
|
21 |
DATA_DATASET = f"{OWNER}/GAIA"
|
22 |
INTERNAL_DATA_DATASET = f"{OWNER}/GAIA_internal"
|
23 |
SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
|
24 |
+
SUBMISSION_DATASET_PUBLIC = f"{OWNER}/submissions_public"
|
25 |
CONTACT_DATASET = f"{OWNER}/contact_info"
|
26 |
RESULTS_DATASET = f"{OWNER}/results_public"
|
27 |
LEADERBOARD_PATH = f"{OWNER}/leaderboard"
|
|
|
77 |
organisation: str,
|
78 |
mail: str,
|
79 |
):
|
80 |
+
is_validation = val_or_test == "validation"
|
81 |
# Very basic email parsing
|
82 |
_, parsed_mail = parseaddr(mail)
|
83 |
if not "@" in parsed_mail:
|
|
|
86 |
print("Adding new eval")
|
87 |
|
88 |
# Check if the combination model/org already exists and prints a warning message if yes
|
89 |
+
if model.lower() in set([m.lower() for m in eval_results[val_or_test]["model"]]) and organisation.lower() in set([o.lower() for o in eval_results[val_or_test]["organisation"]]):
|
90 |
return format_warning("This model has been already submitted.")
|
91 |
|
92 |
if path_to_file is None:
|
|
|
137 |
scores[level] += score
|
138 |
num_questions["all"] += 1
|
139 |
num_questions[level] += 1
|
140 |
+
|
141 |
# Save scored file
|
142 |
api.upload_file(
|
143 |
repo_id=SUBMISSION_DATASET,
|
|
|
147 |
token=TOKEN
|
148 |
)
|
149 |
|
150 |
+
# Save scored file
|
151 |
+
if is_validation:
|
152 |
+
api.upload_file(
|
153 |
+
repo_id=SUBMISSION_DATASET_PUBLIC,
|
154 |
+
path_or_fileobj=f"scored/{organisation}_{model}.jsonl",
|
155 |
+
path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_scored_{datetime.datetime.today()}.jsonl",
|
156 |
+
repo_type="dataset",
|
157 |
+
token=TOKEN
|
158 |
+
)
|
159 |
+
|
160 |
# Actual submission
|
161 |
eval_entry = {
|
162 |
"model": model,
|