Files changed (1) hide show
  1. app.py +196 -113
app.py CHANGED
@@ -287,7 +287,7 @@ def get_evaluator_questions(email, disease_map_data, drug_map_data, user_all_spe
287
  disease_subspecs = set(specs.get('subspecialties', []))
288
 
289
  # Check for intersection
290
- if user_all_specs.intersection(disease_specs) or user_all_specs.intersection(disease_subspecs) or len(user_all_specs)==0:
291
  relevant_diseases.append(disease)
292
 
293
  relevant_drugs = []
@@ -553,9 +553,9 @@ def go_to_page0_from_minus1(question_in_progress_state):
553
 
554
  def go_to_eval_progress_modal(name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, our_methods=our_methods):
555
  # 校验用户信息
556
- if not name or not email or not years_exp_radio: # or not specialty_dd
557
- gr.Info("Please fill out all the required fields (name, email, years of experience). If you are not a licensed physician with a specific specialty, please choose the specialty that most closely aligns with your biomedical expertise.", duration=5)
558
- return gr.update(visible=True), gr.update(visible=False), None, "Please fill out all the required fields (name, email, years of experience). If you are not a licensed physician with a specific specialty, please choose the specialty that most closely aligns with your biomedical expertise.", gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.HTML(), gr.State()
559
 
560
  gr.Info("Loading the data...", duration=3)
561
  user_info, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, page1_reference_answer, question_for_eval, remaining_count = get_next_eval_question(
@@ -591,7 +591,136 @@ def go_to_page1(show_page_1):
591
  return updates
592
 
593
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
594
  # Define restrict function for each criterion
 
 
595
  def make_restrict_function(base_choices):
596
  def restrict_choices_page1(radio_choice, score_a, score_b):
597
  """
@@ -723,7 +852,7 @@ def make_restrict_function(base_choices):
723
  # --- Define Callback Functions for Confirmation Flow ---
724
 
725
 
726
- def build_row_dict(data_subset_state, user_info, question_quality_comments, pairwise, comparisons_reasons, *args):
727
  num_criteria = len(criteria)
728
  ratings_A_vals = list(args[:num_criteria])
729
  ratings_B_vals = list(args[num_criteria:])
@@ -746,7 +875,7 @@ def build_row_dict(data_subset_state, user_info, question_quality_comments, pair
746
  "Prompt": prompt_text,
747
  "ResponseA_Model": response_A_model,
748
  "ResponseB_Model": response_B_model,
749
- "Question Quality Comments": question_quality_comments,
750
  }
751
 
752
  pairwise = [mapping.get(val, val) for val in pairwise]
@@ -760,10 +889,10 @@ def build_row_dict(data_subset_state, user_info, question_quality_comments, pair
760
  return row
761
 
762
 
763
- def final_submit(data_subset_state, user_info, question_quality_comments, pairwise, comparisons_reasons, *args):
764
  # --- Part 1: Submit the current results (Existing Logic) ---
765
- row_dict = build_row_dict(data_subset_state, user_info, question_quality_comments,
766
- pairwise, comparisons_reasons, *args)
767
  append_to_sheet(user_data=None, custom_row_dict=row_dict, custom_sheet_name=str(
768
  TXAGENT_RESULTS_SHEET_BASE_NAME), add_header_when_create_sheet=True)
769
 
@@ -804,15 +933,14 @@ def final_submit(data_subset_state, user_info, question_quality_comments, pairwi
804
 
805
  # Function to validate page1 inputs and directly submit if valid
806
  def validate_and_submit_page1(data_subset_state, user_info, *combined_values):
807
- # combined_values contains question_quality_comments + pairwise choices + comparison reasons + ratings
808
  criteria_count = len(criteria_for_comparison)
809
- question_quality_comments = combined_values[0]
810
- pairwise_list = list(combined_values[1:criteria_count+1])
811
  comparison_reasons_list = list(
812
- combined_values[criteria_count+1:criteria_count*2+1])
813
  ratings_A_list = list(
814
- combined_values[criteria_count*2+1:criteria_count*3+1])
815
- ratings_B_list = list(combined_values[criteria_count*3+1:])
816
 
817
  # Check if all pairwise comparisons are filled
818
  if any(answer is None for answer in pairwise_list):
@@ -873,8 +1001,8 @@ def validate_and_submit_page1(data_subset_state, user_info, *combined_values):
873
  )
874
  gr.Info("Submitting your evaluation and loading the next question...")
875
  # If validation passes, call final_submit and handle form reset
876
- submit_result = final_submit(data_subset_state, user_info, question_quality_comments, pairwise_list,
877
- comparison_reasons_list, *ratings_A_list, *ratings_B_list)
878
 
879
  # Check if there are more questions by looking at the page1 update dict
880
  # submit_result[1] is the page1 update, submit_result[2] is the final_page update
@@ -960,18 +1088,6 @@ centered_col_css = """
960
  width: 100% !important; /* Occupy full width of its column */
961
  white-space: normal !important; /* Allow text to wrap onto multiple lines */
962
  }
963
- #txagent-demo-btn {
964
- background-color: #4CAF50 !important;
965
- color: white !important;
966
- border-color: #4CAF50 !important;
967
- margin-top: 10px !important;
968
- }
969
- #api-key-btn {
970
- background-color: #FF9800 !important;
971
- color: white !important;
972
- border-color: #FF9800 !important;
973
- margin-top: 10px !important;
974
- }
975
  .criteria-radio-score-label [role="radiogroup"],
976
  .criteria-radio-score-label .gr-radio-group,
977
  .criteria-radio-score-label .flex {
@@ -1005,6 +1121,7 @@ with gr.Blocks(css=centered_col_css) as demo:
1005
  pairwise_state = gr.State()
1006
  scores_A_state = gr.State()
1007
  comparison_reasons = gr.State()
 
1008
  unqualified_A_state = gr.State()
1009
  data_subset_state = gr.State()
1010
  question_in_progress = gr.State(0)
@@ -1033,38 +1150,11 @@ with gr.Blocks(css=centered_col_css) as demo:
1033
  with gr.Column(visible=True, elem_id="page-1") as page_minus1:
1034
  gr.HTML("""
1035
  <div>
1036
- <h1>TxAgent: An AI Agent for Therapeutics</h1>
1037
  </div>
1038
  """)
1039
- # Add TxAgent demo and API key buttons
1040
- with gr.Column(scale=1):
1041
- api_key_btn = gr.Button(
1042
- value="Request Access",
1043
- variant="secondary",
1044
- size="lg",
1045
- elem_id="api-key-btn"
1046
- )
1047
- with gr.Column(scale=1):
1048
- txagent_demo_btn = gr.Button(
1049
- value="Access TxAgent",
1050
- variant="secondary",
1051
- size="lg",
1052
- elem_id="txagent-demo-btn"
1053
- )
1054
-
1055
- gr.Markdown(
1056
- """
1057
- For live access to TxAgent, you can:
1058
- - Access TxAgent if you have an account.
1059
- - Request access if you do not have an account yet.
1060
-
1061
- We look forward to your feedback!
1062
- """
1063
- )
1064
-
1065
- # Add extra white space between sections
1066
- gr.HTML("<br>")
1067
-
1068
  with gr.Column(scale=1):
1069
  participate_eval_btn = gr.Button(
1070
  value="Evaluate TxAgent",
@@ -1083,32 +1173,27 @@ with gr.Blocks(css=centered_col_css) as demo:
1083
  Thank you for helping improve TxAgent!
1084
  """
1085
  )
1086
-
1087
-
1088
-
1089
-
1090
- # with gr.Column(scale=1):
1091
- # submit_questions_btn = gr.Button(
1092
- # value="Submit Your Therapeutic Questions",
1093
- # variant="primary",
1094
- # size="lg",
1095
- # elem_id="submit-btn"
1096
- # )
1097
 
1098
  # with gr.Row(elem_classes=["center-row"]):
1099
  # 第二行:分别放两段说明文字
1100
- # with gr.Column(scale=1):
1101
- # gr.Markdown(
1102
- # """
1103
- # By submitting therapeutic questions, you will:
1104
- # - Help identify edge cases and blind spots for AI models.
1105
- # - Help extend AI models to reason in new domains.
1106
- # - Directly shape future model improvements.
1107
-
1108
- # We look forward to seeing your feedback!
1109
- # """
1110
- # )
1111
 
 
 
 
1112
 
1113
  # Add contact information in Markdown format
1114
  contact_info_markdown = """
@@ -1125,29 +1210,11 @@ with gr.Blocks(css=centered_col_css) as demo:
1125
  # For the Google Form button, we'll use JavaScript to open a new tab.
1126
  # The URL for the Google Form should be replaced with the actual link.
1127
  google_form_url = "https://forms.gle/pYvyvEQQwS5gdupQA"
1128
- # submit_questions_btn.click(
1129
- # fn=None,
1130
- # inputs=None,
1131
- # outputs=None,
1132
- # js=f"() => {{ window.open('{google_form_url}', '_blank'); }}"
1133
- # )
1134
-
1135
- # TxAgent demo button click handler
1136
- txagent_demo_url = "https://txagent.curebench.ai/"
1137
- txagent_demo_btn.click(
1138
  fn=None,
1139
  inputs=None,
1140
  outputs=None,
1141
- js=f"() => {{ window.open('{txagent_demo_url}', '_blank'); }}"
1142
- )
1143
-
1144
- # API key application button click handler
1145
- api_key_url = "https://docs.google.com/forms/d/e/1FAIpQLScEFhgT1X0wOkpWjEOMGpvhDFyIfoSMzJZ2HA9o0F0BaNcQPw/viewform?usp=dialog"
1146
- api_key_btn.click(
1147
- fn=None,
1148
- inputs=None,
1149
- outputs=None,
1150
- js=f"() => {{ window.open('{api_key_url}', '_blank'); }}"
1151
  )
1152
 
1153
  # Page 0: Welcome / Informational page.
@@ -1158,9 +1225,9 @@ with gr.Blocks(css=centered_col_css) as demo:
1158
  email = gr.Textbox(
1159
  label="Email (required). Use the same email each time you log into this evaluation portal to avoid receiving repeat questions.")
1160
  specialty_dd = gr.Dropdown(
1161
- choices=specialties_list, label="Primary Medical Specialty (optional). Visit https://www.abms.org/member-boards/specialty-subspecialty-certificates/ for categories.", multiselect=True)
1162
  subspecialty_dd = gr.Dropdown(
1163
- choices=subspecialties_list, label="Subspecialty (optional). Visit https://www.abms.org/member-boards/specialty-subspecialty-certificates/ for categories.", multiselect=True)
1164
  npi_id = gr.Textbox(
1165
  label="National Provider Identifier ID (optional). Visit https://npiregistry.cms.hhs.gov/search to find your NPI ID. Leave blank if you do not have an NPI ID.")
1166
  years_exp_radio = gr.Radio(
@@ -1195,7 +1262,7 @@ with gr.Blocks(css=centered_col_css) as demo:
1195
 
1196
  # Page 1: Pairwise Comparison.
1197
  with gr.Column(visible=False) as page1:
1198
- with gr.Accordion("Click to See Instructions", open=False):
1199
  gr.Markdown("""
1200
  ## Instructions:
1201
  Please review these instructions and enter your information to begin:
@@ -1213,13 +1280,21 @@ with gr.Blocks(css=centered_col_css) as demo:
1213
  # gr.Markdown("Comparison")
1214
  # Add small red button and comments text box in the same row
1215
  page1_prompt = gr.HTML()
1216
-
1217
- question_quality_comments = gr.Textbox(
1218
- placeholder="(Optional) Comments on the question quality, question relevance, or your suitability to evaluate it.",
1219
- show_label=False,
1220
- scale=3,
1221
- container=False,
1222
- )
 
 
 
 
 
 
 
 
1223
 
1224
  page1_error_box = gr.Markdown("") # ADDED: display validation errors
1225
 
@@ -1389,14 +1464,22 @@ with gr.Blocks(css=centered_col_css) as demo:
1389
  chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, data_subset_state],
1390
  scroll_to_output=True
1391
  )
 
 
 
 
 
 
 
 
1392
 
1393
  # Transition from Page 1 to direct submission (no confirmation modal)
1394
  submit_btn_1.click(
1395
  fn=validate_and_submit_page1,
1396
- inputs=[data_subset_state, user_info_state, question_quality_comments, *pairwise_inputs,
1397
  *comparison_reasons_inputs, *ratings_A_page1, *ratings_B_page1],
1398
  outputs=[page1_error_box, page1, final_page, page0_error_box, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning,
1399
- page1_prompt, data_subset_state, user_info_state, question_quality_comments, *pairwise_inputs, *comparison_reasons_inputs, *ratings_A_page1, *ratings_B_page1],
1400
  scroll_to_output=True
1401
  )
1402
 
 
287
  disease_subspecs = set(specs.get('subspecialties', []))
288
 
289
  # Check for intersection
290
+ if user_all_specs.intersection(disease_specs) or user_all_specs.intersection(disease_subspecs):
291
  relevant_diseases.append(disease)
292
 
293
  relevant_drugs = []
 
553
 
554
  def go_to_eval_progress_modal(name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, our_methods=our_methods):
555
  # 校验用户信息
556
+ if not name or not email or not specialty_dd or not years_exp_radio:
557
+ gr.Info("Please fill out all the required fields (name, email, specialty, years of experience). If you are not a licensed physician with a specific specialty, please choose the specialty that most closely aligns with your biomedical expertise.", duration=5)
558
+ return gr.update(visible=True), gr.update(visible=False), None, "Please fill out all the required fields (name, email, specialty, years of experience). If you are not a licensed physician with a specific specialty, please choose the specialty that most closely aligns with your biomedical expertise.", gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.HTML(), gr.State()
559
 
560
  gr.Info("Loading the data...", duration=3)
561
  user_info, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, page1_reference_answer, question_for_eval, remaining_count = get_next_eval_question(
 
591
  return updates
592
 
593
 
594
+ # --- Skip Question Modal Callbacks ---
595
+ def skip_question_and_load_new(user_info_state, our_methods):
596
+ # user_info_state is a tuple: (name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, q_id)
597
+ if user_info_state is None:
598
+ # Defensive: just close modal if no user info
599
+ return gr.update(visible=False), gr.update(visible=False), None, "", gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.HTML(), gr.Markdown(), gr.State()
600
+ # Unpack user_info_state
601
+ name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, _ = user_info_state
602
+ user_info, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, page1_reference_answer, question_for_eval, remaining_count = get_next_eval_question(
603
+ name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, our_methods
604
+ )
605
+ if remaining_count == 0:
606
+ # No more questions, go to final page
607
+ return gr.update(visible=False), gr.update(visible=False), None, "Based on your submitted data, you have no more questions to evaluate. You may exit the page; we will follow-up if we require anything else from you. Thank you!", gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.HTML(), gr.Markdown(), gr.State()
608
+ return gr.update(visible=False), gr.update(visible=True), user_info, "", chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, page1_reference_answer, question_for_eval
609
+
610
+ # --- Skip‑question handler for the "Wrong Question?" button -------------------
611
+
612
+
613
+ def skip_current_question(user_info_state, our_methods: list = our_methods):
614
+ # Guard: user clicked before session started
615
+ gr.Info("Skipping this question and loading the next one…", duration=5)
616
+ if user_info_state is None:
617
+ return (
618
+ None,
619
+ gr.update(
620
+ value="Please start the evaluation before skipping questions."),
621
+ gr.update(value=[]), # Chatbot A history
622
+ gr.update(value=[]), # Chatbot B history
623
+ gr.update(value=""), # Prompt HTML
624
+ gr.State() # data_subset_state
625
+ )
626
+
627
+ # Unpack evaluator identity
628
+ name, email, specialty_dd, subspecialty_dd, yrs_exp, exp_desc, npi_id, _ = user_info_state
629
+
630
+ # Pull the next unused question
631
+ (
632
+ user_info_new,
633
+ _chat_a_answer,
634
+ _chat_b_answer,
635
+ _chat_a_reasoning,
636
+ _chat_b_reasoning,
637
+ _prompt_comp,
638
+ _ref_comp,
639
+ question_for_eval,
640
+ remaining,
641
+ ) = get_next_eval_question(
642
+ name, email, specialty_dd, subspecialty_dd, yrs_exp, exp_desc, npi_id, our_methods
643
+ )
644
+
645
+ # If the pool is exhausted, just notify the evaluator
646
+ if remaining == 0 or question_for_eval is None:
647
+ final_msg = (
648
+ "Based on your submitted data, you have no more questions to evaluate. "
649
+ "You may exit the page; we will follow‑up if we require anything else from you. "
650
+ "Thank you!"
651
+ )
652
+ return (
653
+ user_info_state,
654
+ gr.update(value=final_msg),
655
+ gr.update(value=[]),
656
+ gr.update(value=[]),
657
+ gr.update(value=[]),
658
+ gr.update(value=[]),
659
+ gr.update(value=""),
660
+ gr.State()
661
+ )
662
+
663
+ # --- Build fresh values for the existing UI components ---
664
+ chat_a_answer, chat_a_reasoning, _ = format_chat(
665
+ question_for_eval['models'][0]['reasoning_trace'], tool_database_labels)
666
+ chat_b_answer, chat_b_reasoning, _ = format_chat(
667
+ question_for_eval['models'][1]['reasoning_trace'], tool_database_labels)
668
+
669
+ prompt_html = (
670
+ f"<div style='background-color: #FFEFD5; border: 2px solid #FF8C00; padding: 10px; "
671
+ f"border-radius: 5px; color: black;'><strong style='color: black;'>Question:</strong> "
672
+ f"{question_for_eval['question']}</div>"
673
+ )
674
+ reference_md = question_for_eval.get("correct_answer", "")
675
+ gr.Info("New question loaded…", duration=3)
676
+
677
+ # Return updates to refresh Page 1 in‑place
678
+ return (
679
+ user_info_new,
680
+ gr.update(value=""), # clear any previous error text
681
+ gr.update(value=chat_a_answer), # Chatbot A history
682
+ gr.update(value=chat_b_answer), # Chatbot B history
683
+ gr.update(value=chat_a_reasoning), # Chatbot A reasoning
684
+ gr.update(value=chat_b_reasoning), # Chatbot B reasoning
685
+ gr.update(value=prompt_html), # Prompt
686
+ question_for_eval # store for later pages
687
+ )
688
+
689
+ # --- Handler for "Wrong Question?": flags nonsense and skips
690
+
691
+
692
+ def flag_nonsense_and_skip(user_info_state, skip_comments=""):
693
+ """
694
+ When the evaluator clicks the “Wrong Question?” button, immediately
695
+ record that this question was flagged as nonsensical/irrelevant and
696
+ then load the next question (re‑using the existing skip logic).
697
+ """
698
+ # 1) Record the flag to the Google Sheet so we keep the feedback even
699
+ # if the evaluator stops here.
700
+ if user_info_state is not None:
701
+ name, email, specialty_dd, subspecialty_dd, yrs_exp, exp_desc, npi_id, q_id = user_info_state
702
+ timestamp = datetime.datetime.now().isoformat()
703
+ row = {
704
+ "Timestamp": timestamp,
705
+ "Name": name,
706
+ "Email": email,
707
+ "Question ID": q_id,
708
+ "Question Makes No Sense or Biomedically Irrelevant": True,
709
+ "Skip Comments": skip_comments,
710
+ }
711
+ append_to_sheet(
712
+ user_data=None,
713
+ custom_row_dict=row,
714
+ custom_sheet_name=str(TXAGENT_RESULTS_SHEET_BASE_NAME),
715
+ add_header_when_create_sheet=True,
716
+ )
717
+
718
+ # 2) Fall back to the existing skip logic to advance the UI.
719
+ return skip_current_question(user_info_state)
720
+
721
  # Define restrict function for each criterion
722
+
723
+
724
  def make_restrict_function(base_choices):
725
  def restrict_choices_page1(radio_choice, score_a, score_b):
726
  """
 
852
  # --- Define Callback Functions for Confirmation Flow ---
853
 
854
 
855
+ def build_row_dict(data_subset_state, user_info, pairwise, comparisons_reasons, nonsense_btn_clicked, *args):
856
  num_criteria = len(criteria)
857
  ratings_A_vals = list(args[:num_criteria])
858
  ratings_B_vals = list(args[num_criteria:])
 
875
  "Prompt": prompt_text,
876
  "ResponseA_Model": response_A_model,
877
  "ResponseB_Model": response_B_model,
878
+ "Question Makes No Sense or Biomedically Irrelevant": nonsense_btn_clicked,
879
  }
880
 
881
  pairwise = [mapping.get(val, val) for val in pairwise]
 
889
  return row
890
 
891
 
892
+ def final_submit(data_subset_state, user_info, pairwise, comparisons_reasons, nonsense_btn_clicked, *args):
893
  # --- Part 1: Submit the current results (Existing Logic) ---
894
+ row_dict = build_row_dict(data_subset_state, user_info,
895
+ pairwise, comparisons_reasons, nonsense_btn_clicked, *args)
896
  append_to_sheet(user_data=None, custom_row_dict=row_dict, custom_sheet_name=str(
897
  TXAGENT_RESULTS_SHEET_BASE_NAME), add_header_when_create_sheet=True)
898
 
 
933
 
934
  # Function to validate page1 inputs and directly submit if valid
935
  def validate_and_submit_page1(data_subset_state, user_info, *combined_values):
936
+ # combined_values contains pairwise choices + comparison reasons + ratings
937
  criteria_count = len(criteria_for_comparison)
938
+ pairwise_list = list(combined_values[:criteria_count])
 
939
  comparison_reasons_list = list(
940
+ combined_values[criteria_count:criteria_count*2])
941
  ratings_A_list = list(
942
+ combined_values[criteria_count*2:criteria_count*3])
943
+ ratings_B_list = list(combined_values[criteria_count*3:])
944
 
945
  # Check if all pairwise comparisons are filled
946
  if any(answer is None for answer in pairwise_list):
 
1001
  )
1002
  gr.Info("Submitting your evaluation and loading the next question...")
1003
  # If validation passes, call final_submit and handle form reset
1004
+ submit_result = final_submit(data_subset_state, user_info, pairwise_list,
1005
+ comparison_reasons_list, False, *ratings_A_list, *ratings_B_list)
1006
 
1007
  # Check if there are more questions by looking at the page1 update dict
1008
  # submit_result[1] is the page1 update, submit_result[2] is the final_page update
 
1088
  width: 100% !important; /* Occupy full width of its column */
1089
  white-space: normal !important; /* Allow text to wrap onto multiple lines */
1090
  }
 
 
 
 
 
 
 
 
 
 
 
 
1091
  .criteria-radio-score-label [role="radiogroup"],
1092
  .criteria-radio-score-label .gr-radio-group,
1093
  .criteria-radio-score-label .flex {
 
1121
  pairwise_state = gr.State()
1122
  scores_A_state = gr.State()
1123
  comparison_reasons = gr.State()
1124
+ nonsense_btn_clicked = gr.State(False)
1125
  unqualified_A_state = gr.State()
1126
  data_subset_state = gr.State()
1127
  question_in_progress = gr.State(0)
 
1150
  with gr.Column(visible=True, elem_id="page-1") as page_minus1:
1151
  gr.HTML("""
1152
  <div>
1153
+ <h1>TxAgent Portal: AI Evaluation and Crowdsourcing of Therapeutic Questions</h1>
1154
  </div>
1155
  """)
1156
+ # with gr.Row(elem_classes=["center-row"]):
1157
+ # 第一行:并排放两个按钮
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1158
  with gr.Column(scale=1):
1159
  participate_eval_btn = gr.Button(
1160
  value="Evaluate TxAgent",
 
1173
  Thank you for helping improve TxAgent!
1174
  """
1175
  )
1176
+ with gr.Column(scale=1):
1177
+ submit_questions_btn = gr.Button(
1178
+ value="Submit Your Therapeutic Questions",
1179
+ variant="primary",
1180
+ size="lg",
1181
+ elem_id="submit-btn"
1182
+ )
 
 
 
 
1183
 
1184
  # with gr.Row(elem_classes=["center-row"]):
1185
  # 第二行:分别放两段说明文字
1186
+ with gr.Column(scale=1):
1187
+ gr.Markdown(
1188
+ """
1189
+ By submitting therapeutic questions, you will:
1190
+ - Help identify edge cases and blind spots for AI models.
1191
+ - Help extend AI models to reason in new domains.
1192
+ - Directly shape future model improvements.
 
 
 
 
1193
 
1194
+ We look forward to seeing your feedback!
1195
+ """
1196
+ )
1197
 
1198
  # Add contact information in Markdown format
1199
  contact_info_markdown = """
 
1210
  # For the Google Form button, we'll use JavaScript to open a new tab.
1211
  # The URL for the Google Form should be replaced with the actual link.
1212
  google_form_url = "https://forms.gle/pYvyvEQQwS5gdupQA"
1213
+ submit_questions_btn.click(
 
 
 
 
 
 
 
 
 
1214
  fn=None,
1215
  inputs=None,
1216
  outputs=None,
1217
+ js=f"() => {{ window.open('{google_form_url}', '_blank'); }}"
 
 
 
 
 
 
 
 
 
1218
  )
1219
 
1220
  # Page 0: Welcome / Informational page.
 
1225
  email = gr.Textbox(
1226
  label="Email (required). Use the same email each time you log into this evaluation portal to avoid receiving repeat questions.")
1227
  specialty_dd = gr.Dropdown(
1228
+ choices=specialties_list, label="Primary Medical Specialty (required). Visit https://www.abms.org/member-boards/specialty-subspecialty-certificates/ for categories.", multiselect=True)
1229
  subspecialty_dd = gr.Dropdown(
1230
+ choices=subspecialties_list, label="Subspecialty (if applicable). Visit https://www.abms.org/member-boards/specialty-subspecialty-certificates/ for categories.", multiselect=True)
1231
  npi_id = gr.Textbox(
1232
  label="National Provider Identifier ID (optional). Visit https://npiregistry.cms.hhs.gov/search to find your NPI ID. Leave blank if you do not have an NPI ID.")
1233
  years_exp_radio = gr.Radio(
 
1262
 
1263
  # Page 1: Pairwise Comparison.
1264
  with gr.Column(visible=False) as page1:
1265
+ with gr.Accordion("Instructions", open=False):
1266
  gr.Markdown("""
1267
  ## Instructions:
1268
  Please review these instructions and enter your information to begin:
 
1280
  # gr.Markdown("Comparison")
1281
  # Add small red button and comments text box in the same row
1282
  page1_prompt = gr.HTML()
1283
+ with gr.Row():
1284
+ nonsense_btn = gr.Button(
1285
+ "Skip Question",
1286
+ size="sm",
1287
+ variant="stop", # red variant
1288
+ elem_id="invalid-question-btn",
1289
+ elem_classes=["short-btn"],
1290
+ scale=1
1291
+ )
1292
+ skip_comments = gr.Textbox(
1293
+ placeholder="(Optional) Why do you want to skip this question...",
1294
+ show_label=False,
1295
+ scale=3,
1296
+ container=False,
1297
+ )
1298
 
1299
  page1_error_box = gr.Markdown("") # ADDED: display validation errors
1300
 
 
1464
  chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, data_subset_state],
1465
  scroll_to_output=True
1466
  )
1467
+ # Skip the current question and load a new one when the evaluator flags it
1468
+ nonsense_btn.click(
1469
+ fn=flag_nonsense_and_skip,
1470
+ inputs=[user_info_state, skip_comments],
1471
+ outputs=[user_info_state, page1_error_box, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning,
1472
+ page1_prompt, data_subset_state],
1473
+ scroll_to_output=True
1474
+ )
1475
 
1476
  # Transition from Page 1 to direct submission (no confirmation modal)
1477
  submit_btn_1.click(
1478
  fn=validate_and_submit_page1,
1479
+ inputs=[data_subset_state, user_info_state, *pairwise_inputs,
1480
  *comparison_reasons_inputs, *ratings_A_page1, *ratings_B_page1],
1481
  outputs=[page1_error_box, page1, final_page, page0_error_box, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning,
1482
+ page1_prompt, data_subset_state, user_info_state, *pairwise_inputs, *comparison_reasons_inputs, *ratings_A_page1, *ratings_B_page1],
1483
  scroll_to_output=True
1484
  )
1485