Spaces:
Paused
Paused
Smita R
Smita
commited on
confirmation and error messages post submission (#74)
Browse files- content.py +54 -0
- requirements.txt +1 -0
- submission.py +104 -22
content.py
CHANGED
@@ -67,6 +67,15 @@ The **End-to-End Discovery** category tests whether agents can carry out a compl
|
|
67 |
Scores in this category are aggregated from two benchmarks, providing the first standardized way to evaluate automated scientific discovery (ASD) agents across all stages of the research process. Use the links above to explore individual benchmark pages.
|
68 |
<br>
|
69 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
# External URLs for benchmark descriptions
|
72 |
SCHOLAR_QA_CS_URL = "https://www.semanticscholar.org/paper/OpenScholar%3A-Synthesizing-Scientific-Literature-LMs-Asai-He/b40df4b273f255b3cb5639e220c8ab7b1bdb313e"
|
@@ -220,6 +229,12 @@ css = """
|
|
220 |
--color-background-dark: var(--neutral-900); /* #032629 */
|
221 |
--color-text-light: var(--neutral-50); /* #FAF2E9 */
|
222 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
#intro-paragraph {
|
224 |
font-size: 18px;
|
225 |
max-width: 60%;
|
@@ -485,6 +500,45 @@ span.wrap[tabindex="0"][role="button"][data-editable="false"] {
|
|
485 |
text-decoration: none;
|
486 |
}
|
487 |
/*------ Submission Page CSS ------*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
488 |
#custom-form-group {
|
489 |
border: 1px solid #000 !important;
|
490 |
border-radius: 4px !important;
|
|
|
67 |
Scores in this category are aggregated from two benchmarks, providing the first standardized way to evaluate automated scientific discovery (ASD) agents across all stages of the research process. Use the links above to explore individual benchmark pages.
|
68 |
<br>
|
69 |
"""
|
70 |
+
SUBMISSION_CONFIRMATION = """
|
71 |
+
**Your agent has been submitted to AstaBench for evaluation.**
|
72 |
+
<br><br>
|
73 |
+
🙏 Thanks for contributing!
|
74 |
+
<br><br>
|
75 |
+
You'll receive an email shortly with confirmation and next steps. If there are any issues with your submission, our team will reach out within 5–7 business days.
|
76 |
+
<br><br>
|
77 |
+
We appreciate your support in advancing scientific AI.
|
78 |
+
"""
|
79 |
|
80 |
# External URLs for benchmark descriptions
|
81 |
SCHOLAR_QA_CS_URL = "https://www.semanticscholar.org/paper/OpenScholar%3A-Synthesizing-Scientific-Literature-LMs-Asai-He/b40df4b273f255b3cb5639e220c8ab7b1bdb313e"
|
|
|
229 |
--color-background-dark: var(--neutral-900); /* #032629 */
|
230 |
--color-text-light: var(--neutral-50); /* #FAF2E9 */
|
231 |
}
|
232 |
+
|
233 |
+
/* Global Styles */
|
234 |
+
h2 {
|
235 |
+
overflow: hidden;
|
236 |
+
}
|
237 |
+
|
238 |
#intro-paragraph {
|
239 |
font-size: 18px;
|
240 |
max-width: 60%;
|
|
|
500 |
text-decoration: none;
|
501 |
}
|
502 |
/*------ Submission Page CSS ------*/
|
503 |
+
#submission-modal .modal-container {
|
504 |
+
height: auto;
|
505 |
+
max-width: 600px;
|
506 |
+
}
|
507 |
+
|
508 |
+
#submission-modal-content {
|
509 |
+
padding: 20px;
|
510 |
+
background-color: inherit;
|
511 |
+
border-radius: 8px;
|
512 |
+
text-align: center;
|
513 |
+
}
|
514 |
+
|
515 |
+
#submission-modal-content p{
|
516 |
+
font-size: 16px;
|
517 |
+
}
|
518 |
+
|
519 |
+
.spinner-container {
|
520 |
+
display: flex;
|
521 |
+
flex-direction: column;
|
522 |
+
align-items: center;
|
523 |
+
justify-content: center;
|
524 |
+
padding: 30px;
|
525 |
+
}
|
526 |
+
|
527 |
+
.spinner {
|
528 |
+
width: 50px;
|
529 |
+
height: 50px;
|
530 |
+
border: 5px solid #dee2e6;
|
531 |
+
border-top: 5px solid #007bff;
|
532 |
+
border-radius: 50%;
|
533 |
+
animation: spin 1s linear infinite;
|
534 |
+
margin-bottom: 20px;
|
535 |
+
}
|
536 |
+
|
537 |
+
@keyframes spin {
|
538 |
+
0% { transform: rotate(0deg); }
|
539 |
+
100% { transform: rotate(360deg); }
|
540 |
+
}
|
541 |
+
|
542 |
#custom-form-group {
|
543 |
border: 1px solid #000 !important;
|
544 |
border-radius: 4px !important;
|
requirements.txt
CHANGED
@@ -35,6 +35,7 @@ frozenlist==1.6.0
|
|
35 |
fsspec==2025.3.0
|
36 |
gradio==5.30.0
|
37 |
gradio_client==1.10.1
|
|
|
38 |
groovy==0.1.2
|
39 |
h11==0.16.0
|
40 |
httpcore==1.0.9
|
|
|
35 |
fsspec==2025.3.0
|
36 |
gradio==5.30.0
|
37 |
gradio_client==1.10.1
|
38 |
+
gradio_modal==0.0.4
|
39 |
groovy==0.1.2
|
40 |
h11==0.16.0
|
41 |
httpcore==1.0.9
|
submission.py
CHANGED
@@ -4,6 +4,7 @@ import sys
|
|
4 |
import matplotlib
|
5 |
from agenteval.cli import SUBMISSION_METADATA_FILENAME
|
6 |
from agenteval.models import SubmissionMetadata
|
|
|
7 |
|
8 |
matplotlib.use('Agg')
|
9 |
|
@@ -39,6 +40,7 @@ from config import (
|
|
39 |
from content import (
|
40 |
CITATION_BUTTON_LABEL,
|
41 |
CITATION_BUTTON_TEXT,
|
|
|
42 |
format_error,
|
43 |
format_log,
|
44 |
format_warning,
|
@@ -86,6 +88,9 @@ def checked_upload_folder(
|
|
86 |
submission_name=submission_name_ul,
|
87 |
)
|
88 |
|
|
|
|
|
|
|
89 |
def add_new_eval(
|
90 |
val_or_test: str,
|
91 |
agent_name: str | None,
|
@@ -99,14 +104,20 @@ def add_new_eval(
|
|
99 |
profile: gr.OAuthProfile,
|
100 |
):
|
101 |
if not agent_name:
|
102 |
-
return
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
|
|
107 |
|
108 |
if path_to_file is None:
|
109 |
-
return
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
logger.info(f"agent {agent_name}: Checking submission")
|
112 |
|
@@ -130,10 +141,20 @@ def add_new_eval(
|
|
130 |
creation_date_str = user_data_resp.json()["createdAt"]
|
131 |
created_at = datetime.strptime(creation_date_str, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=timezone.utc)
|
132 |
if submission_time - created_at < timedelta(days=60):
|
133 |
-
return
|
|
|
|
|
|
|
|
|
|
|
134 |
except Exception as e:
|
135 |
logger.warning(f"Error checking user account age: {e}")
|
136 |
-
return
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
logger.debug(f"agent {agent_name}: Submission frequency check {profile.username}")
|
139 |
contact_infos = try_load_dataset_submission(
|
@@ -146,12 +167,22 @@ def add_new_eval(
|
|
146 |
)
|
147 |
if user_submission_dates and (submission_time - user_submission_dates[-1] < timedelta(days=1)):
|
148 |
logger.info(f"agent {agent_name}: Denied submission because user {username} submitted recently")
|
149 |
-
return
|
|
|
|
|
|
|
|
|
|
|
150 |
|
151 |
logger.debug(f"agent {agent_name}: Email validation {email}")
|
152 |
_, parsed_mail = parseaddr(email)
|
153 |
if "@" not in parsed_mail:
|
154 |
-
return
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
logger.debug(f"agent {agent_name}: Duplicate submission check")
|
157 |
if val_or_test in current_eval_results_for_submission and len(current_eval_results_for_submission[val_or_test]) > 0:
|
@@ -159,7 +190,12 @@ def add_new_eval(
|
|
159 |
for sub_item in existing_submissions:
|
160 |
if (sub_item.get("agent_name", "").lower() == agent_name.lower() and
|
161 |
sub_item.get("username", "").lower() == username.lower()):
|
162 |
-
return
|
|
|
|
|
|
|
|
|
|
|
163 |
|
164 |
safe_username = sanitize_path_component(username)
|
165 |
safe_agent_name = sanitize_path_component(agent_name)
|
@@ -181,9 +217,19 @@ def add_new_eval(
|
|
181 |
out.write(fobj.read())
|
182 |
members_extracted +=1
|
183 |
if members_extracted == 0:
|
184 |
-
return
|
|
|
|
|
|
|
|
|
|
|
185 |
except Exception as e:
|
186 |
-
return
|
|
|
|
|
|
|
|
|
|
|
187 |
|
188 |
submission_name = f"{safe_username}_{safe_agent_name}_{submission_time.strftime('%Y-%m-%d_%H-%M-%S')}"
|
189 |
|
@@ -204,9 +250,19 @@ def add_new_eval(
|
|
204 |
try:
|
205 |
checked_upload_folder(api, extracted_dir, SUBMISSION_DATASET, CONFIG_NAME, val_or_test, submission_name)
|
206 |
except ValueError as e:
|
207 |
-
return
|
|
|
|
|
|
|
|
|
|
|
208 |
except Exception as e:
|
209 |
-
return
|
|
|
|
|
|
|
|
|
|
|
210 |
|
211 |
logger.info(f"agent {agent_name}: Save contact information")
|
212 |
contact_info = subm_meta.model_dump()
|
@@ -223,11 +279,20 @@ def add_new_eval(
|
|
223 |
try:
|
224 |
contact_infos.push_to_hub(CONTACT_DATASET, config_name=CONFIG_NAME)
|
225 |
except Exception as e:
|
226 |
-
return
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
|
232 |
def _deprecated_scoring_logic():
|
233 |
# No longer triggered on eval submission. Kept for quick reference for a little while (2025). TODO delete this.
|
@@ -350,9 +415,26 @@ def build_page():
|
|
350 |
)
|
351 |
with gr.Row():
|
352 |
submit_eval_button = gr.Button("Submit Evaluation")
|
353 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
354 |
|
355 |
submit_eval_button.click(
|
|
|
|
|
|
|
|
|
356 |
add_new_eval,
|
357 |
[
|
358 |
level_of_test_radio,
|
@@ -365,7 +447,7 @@ def build_page():
|
|
365 |
username_tb,
|
366 |
mail_tb
|
367 |
],
|
368 |
-
|
369 |
)
|
370 |
with gr.Accordion("📙 Citation", open=False):
|
371 |
gr.Textbox(value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, elem_id="citation-button-main", interactive=False)
|
|
|
4 |
import matplotlib
|
5 |
from agenteval.cli import SUBMISSION_METADATA_FILENAME
|
6 |
from agenteval.models import SubmissionMetadata
|
7 |
+
from gradio_modal import Modal
|
8 |
|
9 |
matplotlib.use('Agg')
|
10 |
|
|
|
40 |
from content import (
|
41 |
CITATION_BUTTON_LABEL,
|
42 |
CITATION_BUTTON_TEXT,
|
43 |
+
SUBMISSION_CONFIRMATION,
|
44 |
format_error,
|
45 |
format_log,
|
46 |
format_warning,
|
|
|
88 |
submission_name=submission_name_ul,
|
89 |
)
|
90 |
|
91 |
+
def show_loading_spinner():
|
92 |
+
return gr.update(visible=True)
|
93 |
+
|
94 |
def add_new_eval(
|
95 |
val_or_test: str,
|
96 |
agent_name: str | None,
|
|
|
104 |
profile: gr.OAuthProfile,
|
105 |
):
|
106 |
if not agent_name:
|
107 |
+
return (
|
108 |
+
format_warning("Please provide an agent name."), # error_message
|
109 |
+
gr.update(visible=True), # error_modal
|
110 |
+
gr.update(visible=False), # success_modal
|
111 |
+
gr.update(visible=False) # loading_modal
|
112 |
+
)
|
113 |
|
114 |
if path_to_file is None:
|
115 |
+
return (
|
116 |
+
format_warning("Please attach a .tar.gz file."), # error_message
|
117 |
+
gr.update(visible=True), # error_modal
|
118 |
+
gr.update(visible=False), # success_modal
|
119 |
+
gr.update(visible=False) # loading_modal
|
120 |
+
)
|
121 |
|
122 |
logger.info(f"agent {agent_name}: Checking submission")
|
123 |
|
|
|
141 |
creation_date_str = user_data_resp.json()["createdAt"]
|
142 |
created_at = datetime.strptime(creation_date_str, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=timezone.utc)
|
143 |
if submission_time - created_at < timedelta(days=60):
|
144 |
+
return (
|
145 |
+
format_error("This account is not authorized to submit here (account too new)."), # error_message
|
146 |
+
gr.update(visible=True), # error_modal
|
147 |
+
gr.update(visible=False), # success_modal
|
148 |
+
gr.update(visible=False) # loading_modal
|
149 |
+
)
|
150 |
except Exception as e:
|
151 |
logger.warning(f"Error checking user account age: {e}")
|
152 |
+
return (
|
153 |
+
format_error("Could not verify account age. Please try again later."), # error_message
|
154 |
+
gr.update(visible=True), # error_modal
|
155 |
+
gr.update(visible=False), # success_modal
|
156 |
+
gr.update(visible=False) # loading_modal
|
157 |
+
)
|
158 |
|
159 |
logger.debug(f"agent {agent_name}: Submission frequency check {profile.username}")
|
160 |
contact_infos = try_load_dataset_submission(
|
|
|
167 |
)
|
168 |
if user_submission_dates and (submission_time - user_submission_dates[-1] < timedelta(days=1)):
|
169 |
logger.info(f"agent {agent_name}: Denied submission because user {username} submitted recently")
|
170 |
+
return (
|
171 |
+
format_error("You already submitted once in the last 24h for this split; please try again later."), # error_message
|
172 |
+
gr.update(visible=True), # error_modal
|
173 |
+
gr.update(visible=False), # success_modal
|
174 |
+
gr.update(visible=False) # loading_modal
|
175 |
+
)
|
176 |
|
177 |
logger.debug(f"agent {agent_name}: Email validation {email}")
|
178 |
_, parsed_mail = parseaddr(email)
|
179 |
if "@" not in parsed_mail:
|
180 |
+
return (
|
181 |
+
format_warning("Please provide a valid email address."), # error_message
|
182 |
+
gr.update(visible=True), # error_modal
|
183 |
+
gr.update(visible=False), # success_modal
|
184 |
+
gr.update(visible=False) # loading_modal
|
185 |
+
)
|
186 |
|
187 |
logger.debug(f"agent {agent_name}: Duplicate submission check")
|
188 |
if val_or_test in current_eval_results_for_submission and len(current_eval_results_for_submission[val_or_test]) > 0:
|
|
|
190 |
for sub_item in existing_submissions:
|
191 |
if (sub_item.get("agent_name", "").lower() == agent_name.lower() and
|
192 |
sub_item.get("username", "").lower() == username.lower()):
|
193 |
+
return (
|
194 |
+
format_warning("This agent name by this user has already been submitted to this split."), # error_message
|
195 |
+
gr.update(visible=True), # error_modal
|
196 |
+
gr.update(visible=False), # success_modal
|
197 |
+
gr.update(visible=False) # loading_modal
|
198 |
+
)
|
199 |
|
200 |
safe_username = sanitize_path_component(username)
|
201 |
safe_agent_name = sanitize_path_component(agent_name)
|
|
|
217 |
out.write(fobj.read())
|
218 |
members_extracted +=1
|
219 |
if members_extracted == 0:
|
220 |
+
return (
|
221 |
+
format_error("Submission tarball is empty or contains no valid files."), # error_message
|
222 |
+
gr.update(visible=True), # error_modal
|
223 |
+
gr.update(visible=False), # success_modal
|
224 |
+
gr.update(visible=False) # loading_modal
|
225 |
+
)
|
226 |
except Exception as e:
|
227 |
+
return (
|
228 |
+
format_error(f"Error extracting file: {e}. Ensure it's a valid .tar.gz."), # error_message
|
229 |
+
gr.update(visible=True), # error_modal
|
230 |
+
gr.update(visible=False), # success_modal
|
231 |
+
gr.update(visible=False) # loading_modal
|
232 |
+
)
|
233 |
|
234 |
submission_name = f"{safe_username}_{safe_agent_name}_{submission_time.strftime('%Y-%m-%d_%H-%M-%S')}"
|
235 |
|
|
|
250 |
try:
|
251 |
checked_upload_folder(api, extracted_dir, SUBMISSION_DATASET, CONFIG_NAME, val_or_test, submission_name)
|
252 |
except ValueError as e:
|
253 |
+
return (
|
254 |
+
format_error(str(e)), # error_message
|
255 |
+
gr.update(visible=True), # error_modal
|
256 |
+
gr.update(visible=False), # success_modal
|
257 |
+
gr.update(visible=False) # loading_modal
|
258 |
+
)
|
259 |
except Exception as e:
|
260 |
+
return (
|
261 |
+
format_error(f"Failed to upload raw submission: {e}"), # error_message
|
262 |
+
gr.update(visible=True), # error_modal
|
263 |
+
gr.update(visible=False), # success_modal
|
264 |
+
gr.update(visible=False) # loading_modal
|
265 |
+
)
|
266 |
|
267 |
logger.info(f"agent {agent_name}: Save contact information")
|
268 |
contact_info = subm_meta.model_dump()
|
|
|
279 |
try:
|
280 |
contact_infos.push_to_hub(CONTACT_DATASET, config_name=CONFIG_NAME)
|
281 |
except Exception as e:
|
282 |
+
return (
|
283 |
+
format_error(f"Submission recorded, but contact info failed to save: {e}"), # error_message
|
284 |
+
gr.update(visible=True), # error_modal
|
285 |
+
gr.update(visible=False), # success_modal
|
286 |
+
gr.update(visible=False) # loading_modal
|
287 |
+
)
|
288 |
+
|
289 |
+
logger.info(f"Agent '{agent_name}' submitted successfully by '{username}' to '{val_or_test}' split.")
|
290 |
+
return (
|
291 |
+
"", # error_message
|
292 |
+
gr.update(visible=False), # error_modal
|
293 |
+
gr.update(visible=True), # success_modal
|
294 |
+
gr.update(visible=False) # loading_modal
|
295 |
+
)
|
296 |
|
297 |
def _deprecated_scoring_logic():
|
298 |
# No longer triggered on eval submission. Kept for quick reference for a little while (2025). TODO delete this.
|
|
|
415 |
)
|
416 |
with gr.Row():
|
417 |
submit_eval_button = gr.Button("Submit Evaluation")
|
418 |
+
|
419 |
+
# Modals for loading spinner, success and error messages
|
420 |
+
with Modal(visible=False, elem_id="submission-modal") as loading_modal:
|
421 |
+
with gr.Column(elem_id="submission-modal-content"):
|
422 |
+
gr.HTML('<div class="spinner-container"><div class="spinner"></div><p>Processing your submission...</p></div>')
|
423 |
+
|
424 |
+
with Modal(visible=False, elem_id="submission-modal") as error_modal:
|
425 |
+
with gr.Column(elem_id="submission-modal-content"):
|
426 |
+
gr.Markdown("## ⚠️ Error")
|
427 |
+
error_message = gr.Markdown()
|
428 |
+
|
429 |
+
with Modal(visible=False, elem_id="submission-modal") as success_modal:
|
430 |
+
with gr.Column(elem_id="submission-modal-content"):
|
431 |
+
gr.Markdown(SUBMISSION_CONFIRMATION)
|
432 |
|
433 |
submit_eval_button.click(
|
434 |
+
show_loading_spinner,
|
435 |
+
None,
|
436 |
+
[loading_modal],
|
437 |
+
).then(
|
438 |
add_new_eval,
|
439 |
[
|
440 |
level_of_test_radio,
|
|
|
447 |
username_tb,
|
448 |
mail_tb
|
449 |
],
|
450 |
+
[error_message, error_modal, success_modal, loading_modal],
|
451 |
)
|
452 |
with gr.Accordion("📙 Citation", open=False):
|
453 |
gr.Textbox(value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, elem_id="citation-button-main", interactive=False)
|