Smita R Smita commited on
Commit
21516b7
·
unverified ·
1 Parent(s): 482c591

confirmation and error messages post submission (#74)

Browse files
Files changed (3) hide show
  1. content.py +54 -0
  2. requirements.txt +1 -0
  3. submission.py +104 -22
content.py CHANGED
@@ -67,6 +67,15 @@ The **End-to-End Discovery** category tests whether agents can carry out a compl
67
  Scores in this category are aggregated from two benchmarks, providing the first standardized way to evaluate automated scientific discovery (ASD) agents across all stages of the research process. Use the links above to explore individual benchmark pages.
68
  <br>
69
  """
 
 
 
 
 
 
 
 
 
70
 
71
  # External URLs for benchmark descriptions
72
  SCHOLAR_QA_CS_URL = "https://www.semanticscholar.org/paper/OpenScholar%3A-Synthesizing-Scientific-Literature-LMs-Asai-He/b40df4b273f255b3cb5639e220c8ab7b1bdb313e"
@@ -220,6 +229,12 @@ css = """
220
  --color-background-dark: var(--neutral-900); /* #032629 */
221
  --color-text-light: var(--neutral-50); /* #FAF2E9 */
222
  }
 
 
 
 
 
 
223
  #intro-paragraph {
224
  font-size: 18px;
225
  max-width: 60%;
@@ -485,6 +500,45 @@ span.wrap[tabindex="0"][role="button"][data-editable="false"] {
485
  text-decoration: none;
486
  }
487
  /*------ Submission Page CSS ------*/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
488
  #custom-form-group {
489
  border: 1px solid #000 !important;
490
  border-radius: 4px !important;
 
67
  Scores in this category are aggregated from two benchmarks, providing the first standardized way to evaluate automated scientific discovery (ASD) agents across all stages of the research process. Use the links above to explore individual benchmark pages.
68
  <br>
69
  """
70
+ SUBMISSION_CONFIRMATION = """
71
+ **Your agent has been submitted to AstaBench for evaluation.**
72
+ <br><br>
73
+ 🙏 Thanks for contributing!
74
+ <br><br>
75
+ You'll receive an email shortly with confirmation and next steps. If there are any issues with your submission, our team will reach out within 5–7 business days.
76
+ <br><br>
77
+ We appreciate your support in advancing scientific AI.
78
+ """
79
 
80
  # External URLs for benchmark descriptions
81
  SCHOLAR_QA_CS_URL = "https://www.semanticscholar.org/paper/OpenScholar%3A-Synthesizing-Scientific-Literature-LMs-Asai-He/b40df4b273f255b3cb5639e220c8ab7b1bdb313e"
 
229
  --color-background-dark: var(--neutral-900); /* #032629 */
230
  --color-text-light: var(--neutral-50); /* #FAF2E9 */
231
  }
232
+
233
+ /* Global Styles */
234
+ h2 {
235
+ overflow: hidden;
236
+ }
237
+
238
  #intro-paragraph {
239
  font-size: 18px;
240
  max-width: 60%;
 
500
  text-decoration: none;
501
  }
502
  /*------ Submission Page CSS ------*/
503
+ #submission-modal .modal-container {
504
+ height: auto;
505
+ max-width: 600px;
506
+ }
507
+
508
+ #submission-modal-content {
509
+ padding: 20px;
510
+ background-color: inherit;
511
+ border-radius: 8px;
512
+ text-align: center;
513
+ }
514
+
515
+ #submission-modal-content p{
516
+ font-size: 16px;
517
+ }
518
+
519
+ .spinner-container {
520
+ display: flex;
521
+ flex-direction: column;
522
+ align-items: center;
523
+ justify-content: center;
524
+ padding: 30px;
525
+ }
526
+
527
+ .spinner {
528
+ width: 50px;
529
+ height: 50px;
530
+ border: 5px solid #dee2e6;
531
+ border-top: 5px solid #007bff;
532
+ border-radius: 50%;
533
+ animation: spin 1s linear infinite;
534
+ margin-bottom: 20px;
535
+ }
536
+
537
+ @keyframes spin {
538
+ 0% { transform: rotate(0deg); }
539
+ 100% { transform: rotate(360deg); }
540
+ }
541
+
542
  #custom-form-group {
543
  border: 1px solid #000 !important;
544
  border-radius: 4px !important;
requirements.txt CHANGED
@@ -35,6 +35,7 @@ frozenlist==1.6.0
35
  fsspec==2025.3.0
36
  gradio==5.30.0
37
  gradio_client==1.10.1
 
38
  groovy==0.1.2
39
  h11==0.16.0
40
  httpcore==1.0.9
 
35
  fsspec==2025.3.0
36
  gradio==5.30.0
37
  gradio_client==1.10.1
38
+ gradio_modal==0.0.4
39
  groovy==0.1.2
40
  h11==0.16.0
41
  httpcore==1.0.9
submission.py CHANGED
@@ -4,6 +4,7 @@ import sys
4
  import matplotlib
5
  from agenteval.cli import SUBMISSION_METADATA_FILENAME
6
  from agenteval.models import SubmissionMetadata
 
7
 
8
  matplotlib.use('Agg')
9
 
@@ -39,6 +40,7 @@ from config import (
39
  from content import (
40
  CITATION_BUTTON_LABEL,
41
  CITATION_BUTTON_TEXT,
 
42
  format_error,
43
  format_log,
44
  format_warning,
@@ -86,6 +88,9 @@ def checked_upload_folder(
86
  submission_name=submission_name_ul,
87
  )
88
 
 
 
 
89
  def add_new_eval(
90
  val_or_test: str,
91
  agent_name: str | None,
@@ -99,14 +104,20 @@ def add_new_eval(
99
  profile: gr.OAuthProfile,
100
  ):
101
  if not agent_name:
102
- return format_warning("Please provide an agent name.")
103
-
104
- def submission_error(msg: str) -> str:
105
- logger.debug(f"agent {agent_name}: {msg}")
106
- return format_error(msg)
 
107
 
108
  if path_to_file is None:
109
- return format_warning("Please attach a .tar.gz file.")
 
 
 
 
 
110
 
111
  logger.info(f"agent {agent_name}: Checking submission")
112
 
@@ -130,10 +141,20 @@ def add_new_eval(
130
  creation_date_str = user_data_resp.json()["createdAt"]
131
  created_at = datetime.strptime(creation_date_str, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=timezone.utc)
132
  if submission_time - created_at < timedelta(days=60):
133
- return format_error("This account is not authorized to submit here (account too new).")
 
 
 
 
 
134
  except Exception as e:
135
  logger.warning(f"Error checking user account age: {e}")
136
- return submission_error("Could not verify account age. Please try again later.")
 
 
 
 
 
137
 
138
  logger.debug(f"agent {agent_name}: Submission frequency check {profile.username}")
139
  contact_infos = try_load_dataset_submission(
@@ -146,12 +167,22 @@ def add_new_eval(
146
  )
147
  if user_submission_dates and (submission_time - user_submission_dates[-1] < timedelta(days=1)):
148
  logger.info(f"agent {agent_name}: Denied submission because user {username} submitted recently")
149
- return format_error("You already submitted once in the last 24h for this split; please try again later.")
 
 
 
 
 
150
 
151
  logger.debug(f"agent {agent_name}: Email validation {email}")
152
  _, parsed_mail = parseaddr(email)
153
  if "@" not in parsed_mail:
154
- return format_warning("Please provide a valid email address.")
 
 
 
 
 
155
 
156
  logger.debug(f"agent {agent_name}: Duplicate submission check")
157
  if val_or_test in current_eval_results_for_submission and len(current_eval_results_for_submission[val_or_test]) > 0:
@@ -159,7 +190,12 @@ def add_new_eval(
159
  for sub_item in existing_submissions:
160
  if (sub_item.get("agent_name", "").lower() == agent_name.lower() and
161
  sub_item.get("username", "").lower() == username.lower()):
162
- return format_warning("This agent name by this user has already been submitted to this split.")
 
 
 
 
 
163
 
164
  safe_username = sanitize_path_component(username)
165
  safe_agent_name = sanitize_path_component(agent_name)
@@ -181,9 +217,19 @@ def add_new_eval(
181
  out.write(fobj.read())
182
  members_extracted +=1
183
  if members_extracted == 0:
184
- return submission_error("Submission tarball is empty or contains no valid files.")
 
 
 
 
 
185
  except Exception as e:
186
- return submission_error(f"Error extracting file: {e}. Ensure it's a valid .tar.gz.")
 
 
 
 
 
187
 
188
  submission_name = f"{safe_username}_{safe_agent_name}_{submission_time.strftime('%Y-%m-%d_%H-%M-%S')}"
189
 
@@ -204,9 +250,19 @@ def add_new_eval(
204
  try:
205
  checked_upload_folder(api, extracted_dir, SUBMISSION_DATASET, CONFIG_NAME, val_or_test, submission_name)
206
  except ValueError as e:
207
- return submission_error(str(e))
 
 
 
 
 
208
  except Exception as e:
209
- return submission_error(f"Failed to upload raw submission: {e}")
 
 
 
 
 
210
 
211
  logger.info(f"agent {agent_name}: Save contact information")
212
  contact_info = subm_meta.model_dump()
@@ -223,11 +279,20 @@ def add_new_eval(
223
  try:
224
  contact_infos.push_to_hub(CONTACT_DATASET, config_name=CONFIG_NAME)
225
  except Exception as e:
226
- return submission_error(f"Submission recorded, but contact info failed to save: {e}")
227
-
228
- msg = f"Agent '{agent_name}' submitted successfully by '{username}' to '{val_or_test}' split. "
229
- logger.info(f"agent {agent_name}: {msg}")
230
- return format_log(msg)
 
 
 
 
 
 
 
 
 
231
 
232
  def _deprecated_scoring_logic():
233
  # No longer triggered on eval submission. Kept for quick reference for a little while (2025). TODO delete this.
@@ -350,9 +415,26 @@ def build_page():
350
  )
351
  with gr.Row():
352
  submit_eval_button = gr.Button("Submit Evaluation")
353
- submission_result = gr.Markdown()
 
 
 
 
 
 
 
 
 
 
 
 
 
354
 
355
  submit_eval_button.click(
 
 
 
 
356
  add_new_eval,
357
  [
358
  level_of_test_radio,
@@ -365,7 +447,7 @@ def build_page():
365
  username_tb,
366
  mail_tb
367
  ],
368
- submission_result,
369
  )
370
  with gr.Accordion("📙 Citation", open=False):
371
  gr.Textbox(value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, elem_id="citation-button-main", interactive=False)
 
4
  import matplotlib
5
  from agenteval.cli import SUBMISSION_METADATA_FILENAME
6
  from agenteval.models import SubmissionMetadata
7
+ from gradio_modal import Modal
8
 
9
  matplotlib.use('Agg')
10
 
 
40
  from content import (
41
  CITATION_BUTTON_LABEL,
42
  CITATION_BUTTON_TEXT,
43
+ SUBMISSION_CONFIRMATION,
44
  format_error,
45
  format_log,
46
  format_warning,
 
88
  submission_name=submission_name_ul,
89
  )
90
 
91
+ def show_loading_spinner():
92
+ return gr.update(visible=True)
93
+
94
  def add_new_eval(
95
  val_or_test: str,
96
  agent_name: str | None,
 
104
  profile: gr.OAuthProfile,
105
  ):
106
  if not agent_name:
107
+ return (
108
+ format_warning("Please provide an agent name."), # error_message
109
+ gr.update(visible=True), # error_modal
110
+ gr.update(visible=False), # success_modal
111
+ gr.update(visible=False) # loading_modal
112
+ )
113
 
114
  if path_to_file is None:
115
+ return (
116
+ format_warning("Please attach a .tar.gz file."), # error_message
117
+ gr.update(visible=True), # error_modal
118
+ gr.update(visible=False), # success_modal
119
+ gr.update(visible=False) # loading_modal
120
+ )
121
 
122
  logger.info(f"agent {agent_name}: Checking submission")
123
 
 
141
  creation_date_str = user_data_resp.json()["createdAt"]
142
  created_at = datetime.strptime(creation_date_str, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=timezone.utc)
143
  if submission_time - created_at < timedelta(days=60):
144
+ return (
145
+ format_error("This account is not authorized to submit here (account too new)."), # error_message
146
+ gr.update(visible=True), # error_modal
147
+ gr.update(visible=False), # success_modal
148
+ gr.update(visible=False) # loading_modal
149
+ )
150
  except Exception as e:
151
  logger.warning(f"Error checking user account age: {e}")
152
+ return (
153
+ format_error("Could not verify account age. Please try again later."), # error_message
154
+ gr.update(visible=True), # error_modal
155
+ gr.update(visible=False), # success_modal
156
+ gr.update(visible=False) # loading_modal
157
+ )
158
 
159
  logger.debug(f"agent {agent_name}: Submission frequency check {profile.username}")
160
  contact_infos = try_load_dataset_submission(
 
167
  )
168
  if user_submission_dates and (submission_time - user_submission_dates[-1] < timedelta(days=1)):
169
  logger.info(f"agent {agent_name}: Denied submission because user {username} submitted recently")
170
+ return (
171
+ format_error("You already submitted once in the last 24h for this split; please try again later."), # error_message
172
+ gr.update(visible=True), # error_modal
173
+ gr.update(visible=False), # success_modal
174
+ gr.update(visible=False) # loading_modal
175
+ )
176
 
177
  logger.debug(f"agent {agent_name}: Email validation {email}")
178
  _, parsed_mail = parseaddr(email)
179
  if "@" not in parsed_mail:
180
+ return (
181
+ format_warning("Please provide a valid email address."), # error_message
182
+ gr.update(visible=True), # error_modal
183
+ gr.update(visible=False), # success_modal
184
+ gr.update(visible=False) # loading_modal
185
+ )
186
 
187
  logger.debug(f"agent {agent_name}: Duplicate submission check")
188
  if val_or_test in current_eval_results_for_submission and len(current_eval_results_for_submission[val_or_test]) > 0:
 
190
  for sub_item in existing_submissions:
191
  if (sub_item.get("agent_name", "").lower() == agent_name.lower() and
192
  sub_item.get("username", "").lower() == username.lower()):
193
+ return (
194
+ format_warning("This agent name by this user has already been submitted to this split."), # error_message
195
+ gr.update(visible=True), # error_modal
196
+ gr.update(visible=False), # success_modal
197
+ gr.update(visible=False) # loading_modal
198
+ )
199
 
200
  safe_username = sanitize_path_component(username)
201
  safe_agent_name = sanitize_path_component(agent_name)
 
217
  out.write(fobj.read())
218
  members_extracted +=1
219
  if members_extracted == 0:
220
+ return (
221
+ format_error("Submission tarball is empty or contains no valid files."), # error_message
222
+ gr.update(visible=True), # error_modal
223
+ gr.update(visible=False), # success_modal
224
+ gr.update(visible=False) # loading_modal
225
+ )
226
  except Exception as e:
227
+ return (
228
+ format_error(f"Error extracting file: {e}. Ensure it's a valid .tar.gz."), # error_message
229
+ gr.update(visible=True), # error_modal
230
+ gr.update(visible=False), # success_modal
231
+ gr.update(visible=False) # loading_modal
232
+ )
233
 
234
  submission_name = f"{safe_username}_{safe_agent_name}_{submission_time.strftime('%Y-%m-%d_%H-%M-%S')}"
235
 
 
250
  try:
251
  checked_upload_folder(api, extracted_dir, SUBMISSION_DATASET, CONFIG_NAME, val_or_test, submission_name)
252
  except ValueError as e:
253
+ return (
254
+ format_error(str(e)), # error_message
255
+ gr.update(visible=True), # error_modal
256
+ gr.update(visible=False), # success_modal
257
+ gr.update(visible=False) # loading_modal
258
+ )
259
  except Exception as e:
260
+ return (
261
+ format_error(f"Failed to upload raw submission: {e}"), # error_message
262
+ gr.update(visible=True), # error_modal
263
+ gr.update(visible=False), # success_modal
264
+ gr.update(visible=False) # loading_modal
265
+ )
266
 
267
  logger.info(f"agent {agent_name}: Save contact information")
268
  contact_info = subm_meta.model_dump()
 
279
  try:
280
  contact_infos.push_to_hub(CONTACT_DATASET, config_name=CONFIG_NAME)
281
  except Exception as e:
282
+ return (
283
+ format_error(f"Submission recorded, but contact info failed to save: {e}"), # error_message
284
+ gr.update(visible=True), # error_modal
285
+ gr.update(visible=False), # success_modal
286
+ gr.update(visible=False) # loading_modal
287
+ )
288
+
289
+ logger.info(f"Agent '{agent_name}' submitted successfully by '{username}' to '{val_or_test}' split.")
290
+ return (
291
+ "", # error_message
292
+ gr.update(visible=False), # error_modal
293
+ gr.update(visible=True), # success_modal
294
+ gr.update(visible=False) # loading_modal
295
+ )
296
 
297
  def _deprecated_scoring_logic():
298
  # No longer triggered on eval submission. Kept for quick reference for a little while (2025). TODO delete this.
 
415
  )
416
  with gr.Row():
417
  submit_eval_button = gr.Button("Submit Evaluation")
418
+
419
+ # Modals for loading spinner, success and error messages
420
+ with Modal(visible=False, elem_id="submission-modal") as loading_modal:
421
+ with gr.Column(elem_id="submission-modal-content"):
422
+ gr.HTML('<div class="spinner-container"><div class="spinner"></div><p>Processing your submission...</p></div>')
423
+
424
+ with Modal(visible=False, elem_id="submission-modal") as error_modal:
425
+ with gr.Column(elem_id="submission-modal-content"):
426
+ gr.Markdown("## ⚠️ Error")
427
+ error_message = gr.Markdown()
428
+
429
+ with Modal(visible=False, elem_id="submission-modal") as success_modal:
430
+ with gr.Column(elem_id="submission-modal-content"):
431
+ gr.Markdown(SUBMISSION_CONFIRMATION)
432
 
433
  submit_eval_button.click(
434
+ show_loading_spinner,
435
+ None,
436
+ [loading_modal],
437
+ ).then(
438
  add_new_eval,
439
  [
440
  level_of_test_radio,
 
447
  username_tb,
448
  mail_tb
449
  ],
450
+ [error_message, error_modal, success_modal, loading_modal],
451
  )
452
  with gr.Accordion("📙 Citation", open=False):
453
  gr.Textbox(value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, elem_id="citation-button-main", interactive=False)