update
#4
by
shgao
- opened
app.py
CHANGED
@@ -287,7 +287,7 @@ def get_evaluator_questions(email, disease_map_data, drug_map_data, user_all_spe
|
|
287 |
disease_subspecs = set(specs.get('subspecialties', []))
|
288 |
|
289 |
# Check for intersection
|
290 |
-
if user_all_specs.intersection(disease_specs) or user_all_specs.intersection(disease_subspecs)
|
291 |
relevant_diseases.append(disease)
|
292 |
|
293 |
relevant_drugs = []
|
@@ -553,9 +553,9 @@ def go_to_page0_from_minus1(question_in_progress_state):
|
|
553 |
|
554 |
def go_to_eval_progress_modal(name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, our_methods=our_methods):
|
555 |
# 校验用户信息
|
556 |
-
if not name or not email or not
|
557 |
-
gr.Info("Please fill out all the required fields (name, email, years of experience). If you are not a licensed physician with a specific specialty, please choose the specialty that most closely aligns with your biomedical expertise.", duration=5)
|
558 |
-
return gr.update(visible=True), gr.update(visible=False), None, "Please fill out all the required fields (name, email, years of experience). If you are not a licensed physician with a specific specialty, please choose the specialty that most closely aligns with your biomedical expertise.", gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.HTML(), gr.State()
|
559 |
|
560 |
gr.Info("Loading the data...", duration=3)
|
561 |
user_info, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, page1_reference_answer, question_for_eval, remaining_count = get_next_eval_question(
|
@@ -591,7 +591,136 @@ def go_to_page1(show_page_1):
|
|
591 |
return updates
|
592 |
|
593 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
594 |
# Define restrict function for each criterion
|
|
|
|
|
595 |
def make_restrict_function(base_choices):
|
596 |
def restrict_choices_page1(radio_choice, score_a, score_b):
|
597 |
"""
|
@@ -723,7 +852,7 @@ def make_restrict_function(base_choices):
|
|
723 |
# --- Define Callback Functions for Confirmation Flow ---
|
724 |
|
725 |
|
726 |
-
def build_row_dict(data_subset_state, user_info,
|
727 |
num_criteria = len(criteria)
|
728 |
ratings_A_vals = list(args[:num_criteria])
|
729 |
ratings_B_vals = list(args[num_criteria:])
|
@@ -746,7 +875,7 @@ def build_row_dict(data_subset_state, user_info, question_quality_comments, pair
|
|
746 |
"Prompt": prompt_text,
|
747 |
"ResponseA_Model": response_A_model,
|
748 |
"ResponseB_Model": response_B_model,
|
749 |
-
"Question
|
750 |
}
|
751 |
|
752 |
pairwise = [mapping.get(val, val) for val in pairwise]
|
@@ -760,10 +889,10 @@ def build_row_dict(data_subset_state, user_info, question_quality_comments, pair
|
|
760 |
return row
|
761 |
|
762 |
|
763 |
-
def final_submit(data_subset_state, user_info,
|
764 |
# --- Part 1: Submit the current results (Existing Logic) ---
|
765 |
-
row_dict = build_row_dict(data_subset_state, user_info,
|
766 |
-
pairwise, comparisons_reasons, *args)
|
767 |
append_to_sheet(user_data=None, custom_row_dict=row_dict, custom_sheet_name=str(
|
768 |
TXAGENT_RESULTS_SHEET_BASE_NAME), add_header_when_create_sheet=True)
|
769 |
|
@@ -804,15 +933,14 @@ def final_submit(data_subset_state, user_info, question_quality_comments, pairwi
|
|
804 |
|
805 |
# Function to validate page1 inputs and directly submit if valid
|
806 |
def validate_and_submit_page1(data_subset_state, user_info, *combined_values):
|
807 |
-
# combined_values contains
|
808 |
criteria_count = len(criteria_for_comparison)
|
809 |
-
|
810 |
-
pairwise_list = list(combined_values[1:criteria_count+1])
|
811 |
comparison_reasons_list = list(
|
812 |
-
combined_values[criteria_count
|
813 |
ratings_A_list = list(
|
814 |
-
combined_values[criteria_count*2
|
815 |
-
ratings_B_list = list(combined_values[criteria_count*3
|
816 |
|
817 |
# Check if all pairwise comparisons are filled
|
818 |
if any(answer is None for answer in pairwise_list):
|
@@ -873,8 +1001,8 @@ def validate_and_submit_page1(data_subset_state, user_info, *combined_values):
|
|
873 |
)
|
874 |
gr.Info("Submitting your evaluation and loading the next question...")
|
875 |
# If validation passes, call final_submit and handle form reset
|
876 |
-
submit_result = final_submit(data_subset_state, user_info,
|
877 |
-
comparison_reasons_list, *ratings_A_list, *ratings_B_list)
|
878 |
|
879 |
# Check if there are more questions by looking at the page1 update dict
|
880 |
# submit_result[1] is the page1 update, submit_result[2] is the final_page update
|
@@ -960,18 +1088,6 @@ centered_col_css = """
|
|
960 |
width: 100% !important; /* Occupy full width of its column */
|
961 |
white-space: normal !important; /* Allow text to wrap onto multiple lines */
|
962 |
}
|
963 |
-
#txagent-demo-btn {
|
964 |
-
background-color: #4CAF50 !important;
|
965 |
-
color: white !important;
|
966 |
-
border-color: #4CAF50 !important;
|
967 |
-
margin-top: 10px !important;
|
968 |
-
}
|
969 |
-
#api-key-btn {
|
970 |
-
background-color: #FF9800 !important;
|
971 |
-
color: white !important;
|
972 |
-
border-color: #FF9800 !important;
|
973 |
-
margin-top: 10px !important;
|
974 |
-
}
|
975 |
.criteria-radio-score-label [role="radiogroup"],
|
976 |
.criteria-radio-score-label .gr-radio-group,
|
977 |
.criteria-radio-score-label .flex {
|
@@ -1005,6 +1121,7 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
1005 |
pairwise_state = gr.State()
|
1006 |
scores_A_state = gr.State()
|
1007 |
comparison_reasons = gr.State()
|
|
|
1008 |
unqualified_A_state = gr.State()
|
1009 |
data_subset_state = gr.State()
|
1010 |
question_in_progress = gr.State(0)
|
@@ -1033,38 +1150,11 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
1033 |
with gr.Column(visible=True, elem_id="page-1") as page_minus1:
|
1034 |
gr.HTML("""
|
1035 |
<div>
|
1036 |
-
<h1>TxAgent:
|
1037 |
</div>
|
1038 |
""")
|
1039 |
-
#
|
1040 |
-
|
1041 |
-
api_key_btn = gr.Button(
|
1042 |
-
value="Request Access",
|
1043 |
-
variant="secondary",
|
1044 |
-
size="lg",
|
1045 |
-
elem_id="api-key-btn"
|
1046 |
-
)
|
1047 |
-
with gr.Column(scale=1):
|
1048 |
-
txagent_demo_btn = gr.Button(
|
1049 |
-
value="Access TxAgent",
|
1050 |
-
variant="secondary",
|
1051 |
-
size="lg",
|
1052 |
-
elem_id="txagent-demo-btn"
|
1053 |
-
)
|
1054 |
-
|
1055 |
-
gr.Markdown(
|
1056 |
-
"""
|
1057 |
-
For live access to TxAgent, you can:
|
1058 |
-
- Access TxAgent if you have an account.
|
1059 |
-
- Request access if you do not have an account yet.
|
1060 |
-
|
1061 |
-
We look forward to your feedback!
|
1062 |
-
"""
|
1063 |
-
)
|
1064 |
-
|
1065 |
-
# Add extra white space between sections
|
1066 |
-
gr.HTML("<br>")
|
1067 |
-
|
1068 |
with gr.Column(scale=1):
|
1069 |
participate_eval_btn = gr.Button(
|
1070 |
value="Evaluate TxAgent",
|
@@ -1083,32 +1173,27 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
1083 |
Thank you for helping improve TxAgent!
|
1084 |
"""
|
1085 |
)
|
1086 |
-
|
1087 |
-
|
1088 |
-
|
1089 |
-
|
1090 |
-
|
1091 |
-
|
1092 |
-
|
1093 |
-
# variant="primary",
|
1094 |
-
# size="lg",
|
1095 |
-
# elem_id="submit-btn"
|
1096 |
-
# )
|
1097 |
|
1098 |
# with gr.Row(elem_classes=["center-row"]):
|
1099 |
# 第二行:分别放两段说明文字
|
1100 |
-
|
1101 |
-
|
1102 |
-
|
1103 |
-
|
1104 |
-
|
1105 |
-
|
1106 |
-
|
1107 |
-
|
1108 |
-
# We look forward to seeing your feedback!
|
1109 |
-
# """
|
1110 |
-
# )
|
1111 |
|
|
|
|
|
|
|
1112 |
|
1113 |
# Add contact information in Markdown format
|
1114 |
contact_info_markdown = """
|
@@ -1125,29 +1210,11 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
1125 |
# For the Google Form button, we'll use JavaScript to open a new tab.
|
1126 |
# The URL for the Google Form should be replaced with the actual link.
|
1127 |
google_form_url = "https://forms.gle/pYvyvEQQwS5gdupQA"
|
1128 |
-
|
1129 |
-
# fn=None,
|
1130 |
-
# inputs=None,
|
1131 |
-
# outputs=None,
|
1132 |
-
# js=f"() => {{ window.open('{google_form_url}', '_blank'); }}"
|
1133 |
-
# )
|
1134 |
-
|
1135 |
-
# TxAgent demo button click handler
|
1136 |
-
txagent_demo_url = "https://txagent.curebench.ai/"
|
1137 |
-
txagent_demo_btn.click(
|
1138 |
fn=None,
|
1139 |
inputs=None,
|
1140 |
outputs=None,
|
1141 |
-
js=f"() => {{ window.open('{
|
1142 |
-
)
|
1143 |
-
|
1144 |
-
# API key application button click handler
|
1145 |
-
api_key_url = "https://docs.google.com/forms/d/e/1FAIpQLScEFhgT1X0wOkpWjEOMGpvhDFyIfoSMzJZ2HA9o0F0BaNcQPw/viewform?usp=dialog"
|
1146 |
-
api_key_btn.click(
|
1147 |
-
fn=None,
|
1148 |
-
inputs=None,
|
1149 |
-
outputs=None,
|
1150 |
-
js=f"() => {{ window.open('{api_key_url}', '_blank'); }}"
|
1151 |
)
|
1152 |
|
1153 |
# Page 0: Welcome / Informational page.
|
@@ -1158,9 +1225,9 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
1158 |
email = gr.Textbox(
|
1159 |
label="Email (required). Use the same email each time you log into this evaluation portal to avoid receiving repeat questions.")
|
1160 |
specialty_dd = gr.Dropdown(
|
1161 |
-
choices=specialties_list, label="Primary Medical Specialty (
|
1162 |
subspecialty_dd = gr.Dropdown(
|
1163 |
-
choices=subspecialties_list, label="Subspecialty (
|
1164 |
npi_id = gr.Textbox(
|
1165 |
label="National Provider Identifier ID (optional). Visit https://npiregistry.cms.hhs.gov/search to find your NPI ID. Leave blank if you do not have an NPI ID.")
|
1166 |
years_exp_radio = gr.Radio(
|
@@ -1195,7 +1262,7 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
1195 |
|
1196 |
# Page 1: Pairwise Comparison.
|
1197 |
with gr.Column(visible=False) as page1:
|
1198 |
-
with gr.Accordion("
|
1199 |
gr.Markdown("""
|
1200 |
## Instructions:
|
1201 |
Please review these instructions and enter your information to begin:
|
@@ -1213,13 +1280,21 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
1213 |
# gr.Markdown("Comparison")
|
1214 |
# Add small red button and comments text box in the same row
|
1215 |
page1_prompt = gr.HTML()
|
1216 |
-
|
1217 |
-
|
1218 |
-
|
1219 |
-
|
1220 |
-
|
1221 |
-
|
1222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1223 |
|
1224 |
page1_error_box = gr.Markdown("") # ADDED: display validation errors
|
1225 |
|
@@ -1389,14 +1464,22 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
1389 |
chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, data_subset_state],
|
1390 |
scroll_to_output=True
|
1391 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1392 |
|
1393 |
# Transition from Page 1 to direct submission (no confirmation modal)
|
1394 |
submit_btn_1.click(
|
1395 |
fn=validate_and_submit_page1,
|
1396 |
-
inputs=[data_subset_state, user_info_state,
|
1397 |
*comparison_reasons_inputs, *ratings_A_page1, *ratings_B_page1],
|
1398 |
outputs=[page1_error_box, page1, final_page, page0_error_box, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning,
|
1399 |
-
page1_prompt, data_subset_state, user_info_state,
|
1400 |
scroll_to_output=True
|
1401 |
)
|
1402 |
|
|
|
287 |
disease_subspecs = set(specs.get('subspecialties', []))
|
288 |
|
289 |
# Check for intersection
|
290 |
+
if user_all_specs.intersection(disease_specs) or user_all_specs.intersection(disease_subspecs):
|
291 |
relevant_diseases.append(disease)
|
292 |
|
293 |
relevant_drugs = []
|
|
|
553 |
|
554 |
def go_to_eval_progress_modal(name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, our_methods=our_methods):
|
555 |
# 校验用户信息
|
556 |
+
if not name or not email or not specialty_dd or not years_exp_radio:
|
557 |
+
gr.Info("Please fill out all the required fields (name, email, specialty, years of experience). If you are not a licensed physician with a specific specialty, please choose the specialty that most closely aligns with your biomedical expertise.", duration=5)
|
558 |
+
return gr.update(visible=True), gr.update(visible=False), None, "Please fill out all the required fields (name, email, specialty, years of experience). If you are not a licensed physician with a specific specialty, please choose the specialty that most closely aligns with your biomedical expertise.", gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.HTML(), gr.State()
|
559 |
|
560 |
gr.Info("Loading the data...", duration=3)
|
561 |
user_info, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, page1_reference_answer, question_for_eval, remaining_count = get_next_eval_question(
|
|
|
591 |
return updates
|
592 |
|
593 |
|
594 |
+
# --- Skip Question Modal Callbacks ---
|
595 |
+
def skip_question_and_load_new(user_info_state, our_methods):
|
596 |
+
# user_info_state is a tuple: (name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, q_id)
|
597 |
+
if user_info_state is None:
|
598 |
+
# Defensive: just close modal if no user info
|
599 |
+
return gr.update(visible=False), gr.update(visible=False), None, "", gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.HTML(), gr.Markdown(), gr.State()
|
600 |
+
# Unpack user_info_state
|
601 |
+
name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, _ = user_info_state
|
602 |
+
user_info, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, page1_reference_answer, question_for_eval, remaining_count = get_next_eval_question(
|
603 |
+
name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, our_methods
|
604 |
+
)
|
605 |
+
if remaining_count == 0:
|
606 |
+
# No more questions, go to final page
|
607 |
+
return gr.update(visible=False), gr.update(visible=False), None, "Based on your submitted data, you have no more questions to evaluate. You may exit the page; we will follow-up if we require anything else from you. Thank you!", gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.HTML(), gr.Markdown(), gr.State()
|
608 |
+
return gr.update(visible=False), gr.update(visible=True), user_info, "", chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, page1_reference_answer, question_for_eval
|
609 |
+
|
610 |
+
# --- Skip‑question handler for the "Wrong Question?" button -------------------
|
611 |
+
|
612 |
+
|
613 |
+
def skip_current_question(user_info_state, our_methods: list = our_methods):
|
614 |
+
# Guard: user clicked before session started
|
615 |
+
gr.Info("Skipping this question and loading the next one…", duration=5)
|
616 |
+
if user_info_state is None:
|
617 |
+
return (
|
618 |
+
None,
|
619 |
+
gr.update(
|
620 |
+
value="Please start the evaluation before skipping questions."),
|
621 |
+
gr.update(value=[]), # Chatbot A history
|
622 |
+
gr.update(value=[]), # Chatbot B history
|
623 |
+
gr.update(value=""), # Prompt HTML
|
624 |
+
gr.State() # data_subset_state
|
625 |
+
)
|
626 |
+
|
627 |
+
# Unpack evaluator identity
|
628 |
+
name, email, specialty_dd, subspecialty_dd, yrs_exp, exp_desc, npi_id, _ = user_info_state
|
629 |
+
|
630 |
+
# Pull the next unused question
|
631 |
+
(
|
632 |
+
user_info_new,
|
633 |
+
_chat_a_answer,
|
634 |
+
_chat_b_answer,
|
635 |
+
_chat_a_reasoning,
|
636 |
+
_chat_b_reasoning,
|
637 |
+
_prompt_comp,
|
638 |
+
_ref_comp,
|
639 |
+
question_for_eval,
|
640 |
+
remaining,
|
641 |
+
) = get_next_eval_question(
|
642 |
+
name, email, specialty_dd, subspecialty_dd, yrs_exp, exp_desc, npi_id, our_methods
|
643 |
+
)
|
644 |
+
|
645 |
+
# If the pool is exhausted, just notify the evaluator
|
646 |
+
if remaining == 0 or question_for_eval is None:
|
647 |
+
final_msg = (
|
648 |
+
"Based on your submitted data, you have no more questions to evaluate. "
|
649 |
+
"You may exit the page; we will follow‑up if we require anything else from you. "
|
650 |
+
"Thank you!"
|
651 |
+
)
|
652 |
+
return (
|
653 |
+
user_info_state,
|
654 |
+
gr.update(value=final_msg),
|
655 |
+
gr.update(value=[]),
|
656 |
+
gr.update(value=[]),
|
657 |
+
gr.update(value=[]),
|
658 |
+
gr.update(value=[]),
|
659 |
+
gr.update(value=""),
|
660 |
+
gr.State()
|
661 |
+
)
|
662 |
+
|
663 |
+
# --- Build fresh values for the existing UI components ---
|
664 |
+
chat_a_answer, chat_a_reasoning, _ = format_chat(
|
665 |
+
question_for_eval['models'][0]['reasoning_trace'], tool_database_labels)
|
666 |
+
chat_b_answer, chat_b_reasoning, _ = format_chat(
|
667 |
+
question_for_eval['models'][1]['reasoning_trace'], tool_database_labels)
|
668 |
+
|
669 |
+
prompt_html = (
|
670 |
+
f"<div style='background-color: #FFEFD5; border: 2px solid #FF8C00; padding: 10px; "
|
671 |
+
f"border-radius: 5px; color: black;'><strong style='color: black;'>Question:</strong> "
|
672 |
+
f"{question_for_eval['question']}</div>"
|
673 |
+
)
|
674 |
+
reference_md = question_for_eval.get("correct_answer", "")
|
675 |
+
gr.Info("New question loaded…", duration=3)
|
676 |
+
|
677 |
+
# Return updates to refresh Page 1 in‑place
|
678 |
+
return (
|
679 |
+
user_info_new,
|
680 |
+
gr.update(value=""), # clear any previous error text
|
681 |
+
gr.update(value=chat_a_answer), # Chatbot A history
|
682 |
+
gr.update(value=chat_b_answer), # Chatbot B history
|
683 |
+
gr.update(value=chat_a_reasoning), # Chatbot A reasoning
|
684 |
+
gr.update(value=chat_b_reasoning), # Chatbot B reasoning
|
685 |
+
gr.update(value=prompt_html), # Prompt
|
686 |
+
question_for_eval # store for later pages
|
687 |
+
)
|
688 |
+
|
689 |
+
# --- Handler for "Wrong Question?": flags nonsense and skips
|
690 |
+
|
691 |
+
|
692 |
+
def flag_nonsense_and_skip(user_info_state, skip_comments=""):
|
693 |
+
"""
|
694 |
+
When the evaluator clicks the “Wrong Question?” button, immediately
|
695 |
+
record that this question was flagged as nonsensical/irrelevant and
|
696 |
+
then load the next question (re‑using the existing skip logic).
|
697 |
+
"""
|
698 |
+
# 1) Record the flag to the Google Sheet so we keep the feedback even
|
699 |
+
# if the evaluator stops here.
|
700 |
+
if user_info_state is not None:
|
701 |
+
name, email, specialty_dd, subspecialty_dd, yrs_exp, exp_desc, npi_id, q_id = user_info_state
|
702 |
+
timestamp = datetime.datetime.now().isoformat()
|
703 |
+
row = {
|
704 |
+
"Timestamp": timestamp,
|
705 |
+
"Name": name,
|
706 |
+
"Email": email,
|
707 |
+
"Question ID": q_id,
|
708 |
+
"Question Makes No Sense or Biomedically Irrelevant": True,
|
709 |
+
"Skip Comments": skip_comments,
|
710 |
+
}
|
711 |
+
append_to_sheet(
|
712 |
+
user_data=None,
|
713 |
+
custom_row_dict=row,
|
714 |
+
custom_sheet_name=str(TXAGENT_RESULTS_SHEET_BASE_NAME),
|
715 |
+
add_header_when_create_sheet=True,
|
716 |
+
)
|
717 |
+
|
718 |
+
# 2) Fall back to the existing skip logic to advance the UI.
|
719 |
+
return skip_current_question(user_info_state)
|
720 |
+
|
721 |
# Define restrict function for each criterion
|
722 |
+
|
723 |
+
|
724 |
def make_restrict_function(base_choices):
|
725 |
def restrict_choices_page1(radio_choice, score_a, score_b):
|
726 |
"""
|
|
|
852 |
# --- Define Callback Functions for Confirmation Flow ---
|
853 |
|
854 |
|
855 |
+
def build_row_dict(data_subset_state, user_info, pairwise, comparisons_reasons, nonsense_btn_clicked, *args):
|
856 |
num_criteria = len(criteria)
|
857 |
ratings_A_vals = list(args[:num_criteria])
|
858 |
ratings_B_vals = list(args[num_criteria:])
|
|
|
875 |
"Prompt": prompt_text,
|
876 |
"ResponseA_Model": response_A_model,
|
877 |
"ResponseB_Model": response_B_model,
|
878 |
+
"Question Makes No Sense or Biomedically Irrelevant": nonsense_btn_clicked,
|
879 |
}
|
880 |
|
881 |
pairwise = [mapping.get(val, val) for val in pairwise]
|
|
|
889 |
return row
|
890 |
|
891 |
|
892 |
+
def final_submit(data_subset_state, user_info, pairwise, comparisons_reasons, nonsense_btn_clicked, *args):
|
893 |
# --- Part 1: Submit the current results (Existing Logic) ---
|
894 |
+
row_dict = build_row_dict(data_subset_state, user_info,
|
895 |
+
pairwise, comparisons_reasons, nonsense_btn_clicked, *args)
|
896 |
append_to_sheet(user_data=None, custom_row_dict=row_dict, custom_sheet_name=str(
|
897 |
TXAGENT_RESULTS_SHEET_BASE_NAME), add_header_when_create_sheet=True)
|
898 |
|
|
|
933 |
|
934 |
# Function to validate page1 inputs and directly submit if valid
|
935 |
def validate_and_submit_page1(data_subset_state, user_info, *combined_values):
|
936 |
+
# combined_values contains pairwise choices + comparison reasons + ratings
|
937 |
criteria_count = len(criteria_for_comparison)
|
938 |
+
pairwise_list = list(combined_values[:criteria_count])
|
|
|
939 |
comparison_reasons_list = list(
|
940 |
+
combined_values[criteria_count:criteria_count*2])
|
941 |
ratings_A_list = list(
|
942 |
+
combined_values[criteria_count*2:criteria_count*3])
|
943 |
+
ratings_B_list = list(combined_values[criteria_count*3:])
|
944 |
|
945 |
# Check if all pairwise comparisons are filled
|
946 |
if any(answer is None for answer in pairwise_list):
|
|
|
1001 |
)
|
1002 |
gr.Info("Submitting your evaluation and loading the next question...")
|
1003 |
# If validation passes, call final_submit and handle form reset
|
1004 |
+
submit_result = final_submit(data_subset_state, user_info, pairwise_list,
|
1005 |
+
comparison_reasons_list, False, *ratings_A_list, *ratings_B_list)
|
1006 |
|
1007 |
# Check if there are more questions by looking at the page1 update dict
|
1008 |
# submit_result[1] is the page1 update, submit_result[2] is the final_page update
|
|
|
1088 |
width: 100% !important; /* Occupy full width of its column */
|
1089 |
white-space: normal !important; /* Allow text to wrap onto multiple lines */
|
1090 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1091 |
.criteria-radio-score-label [role="radiogroup"],
|
1092 |
.criteria-radio-score-label .gr-radio-group,
|
1093 |
.criteria-radio-score-label .flex {
|
|
|
1121 |
pairwise_state = gr.State()
|
1122 |
scores_A_state = gr.State()
|
1123 |
comparison_reasons = gr.State()
|
1124 |
+
nonsense_btn_clicked = gr.State(False)
|
1125 |
unqualified_A_state = gr.State()
|
1126 |
data_subset_state = gr.State()
|
1127 |
question_in_progress = gr.State(0)
|
|
|
1150 |
with gr.Column(visible=True, elem_id="page-1") as page_minus1:
|
1151 |
gr.HTML("""
|
1152 |
<div>
|
1153 |
+
<h1>TxAgent Portal: AI Evaluation and Crowdsourcing of Therapeutic Questions</h1>
|
1154 |
</div>
|
1155 |
""")
|
1156 |
+
# with gr.Row(elem_classes=["center-row"]):
|
1157 |
+
# 第一行:并排放两个按钮
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1158 |
with gr.Column(scale=1):
|
1159 |
participate_eval_btn = gr.Button(
|
1160 |
value="Evaluate TxAgent",
|
|
|
1173 |
Thank you for helping improve TxAgent!
|
1174 |
"""
|
1175 |
)
|
1176 |
+
with gr.Column(scale=1):
|
1177 |
+
submit_questions_btn = gr.Button(
|
1178 |
+
value="Submit Your Therapeutic Questions",
|
1179 |
+
variant="primary",
|
1180 |
+
size="lg",
|
1181 |
+
elem_id="submit-btn"
|
1182 |
+
)
|
|
|
|
|
|
|
|
|
1183 |
|
1184 |
# with gr.Row(elem_classes=["center-row"]):
|
1185 |
# 第二行:分别放两段说明文字
|
1186 |
+
with gr.Column(scale=1):
|
1187 |
+
gr.Markdown(
|
1188 |
+
"""
|
1189 |
+
By submitting therapeutic questions, you will:
|
1190 |
+
- Help identify edge cases and blind spots for AI models.
|
1191 |
+
- Help extend AI models to reason in new domains.
|
1192 |
+
- Directly shape future model improvements.
|
|
|
|
|
|
|
|
|
1193 |
|
1194 |
+
We look forward to seeing your feedback!
|
1195 |
+
"""
|
1196 |
+
)
|
1197 |
|
1198 |
# Add contact information in Markdown format
|
1199 |
contact_info_markdown = """
|
|
|
1210 |
# For the Google Form button, we'll use JavaScript to open a new tab.
|
1211 |
# The URL for the Google Form should be replaced with the actual link.
|
1212 |
google_form_url = "https://forms.gle/pYvyvEQQwS5gdupQA"
|
1213 |
+
submit_questions_btn.click(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1214 |
fn=None,
|
1215 |
inputs=None,
|
1216 |
outputs=None,
|
1217 |
+
js=f"() => {{ window.open('{google_form_url}', '_blank'); }}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1218 |
)
|
1219 |
|
1220 |
# Page 0: Welcome / Informational page.
|
|
|
1225 |
email = gr.Textbox(
|
1226 |
label="Email (required). Use the same email each time you log into this evaluation portal to avoid receiving repeat questions.")
|
1227 |
specialty_dd = gr.Dropdown(
|
1228 |
+
choices=specialties_list, label="Primary Medical Specialty (required). Visit https://www.abms.org/member-boards/specialty-subspecialty-certificates/ for categories.", multiselect=True)
|
1229 |
subspecialty_dd = gr.Dropdown(
|
1230 |
+
choices=subspecialties_list, label="Subspecialty (if applicable). Visit https://www.abms.org/member-boards/specialty-subspecialty-certificates/ for categories.", multiselect=True)
|
1231 |
npi_id = gr.Textbox(
|
1232 |
label="National Provider Identifier ID (optional). Visit https://npiregistry.cms.hhs.gov/search to find your NPI ID. Leave blank if you do not have an NPI ID.")
|
1233 |
years_exp_radio = gr.Radio(
|
|
|
1262 |
|
1263 |
# Page 1: Pairwise Comparison.
|
1264 |
with gr.Column(visible=False) as page1:
|
1265 |
+
with gr.Accordion("Instructions", open=False):
|
1266 |
gr.Markdown("""
|
1267 |
## Instructions:
|
1268 |
Please review these instructions and enter your information to begin:
|
|
|
1280 |
# gr.Markdown("Comparison")
|
1281 |
# Add small red button and comments text box in the same row
|
1282 |
page1_prompt = gr.HTML()
|
1283 |
+
with gr.Row():
|
1284 |
+
nonsense_btn = gr.Button(
|
1285 |
+
"Skip Question",
|
1286 |
+
size="sm",
|
1287 |
+
variant="stop", # red variant
|
1288 |
+
elem_id="invalid-question-btn",
|
1289 |
+
elem_classes=["short-btn"],
|
1290 |
+
scale=1
|
1291 |
+
)
|
1292 |
+
skip_comments = gr.Textbox(
|
1293 |
+
placeholder="(Optional) Why do you want to skip this question...",
|
1294 |
+
show_label=False,
|
1295 |
+
scale=3,
|
1296 |
+
container=False,
|
1297 |
+
)
|
1298 |
|
1299 |
page1_error_box = gr.Markdown("") # ADDED: display validation errors
|
1300 |
|
|
|
1464 |
chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, data_subset_state],
|
1465 |
scroll_to_output=True
|
1466 |
)
|
1467 |
+
# Skip the current question and load a new one when the evaluator flags it
|
1468 |
+
nonsense_btn.click(
|
1469 |
+
fn=flag_nonsense_and_skip,
|
1470 |
+
inputs=[user_info_state, skip_comments],
|
1471 |
+
outputs=[user_info_state, page1_error_box, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning,
|
1472 |
+
page1_prompt, data_subset_state],
|
1473 |
+
scroll_to_output=True
|
1474 |
+
)
|
1475 |
|
1476 |
# Transition from Page 1 to direct submission (no confirmation modal)
|
1477 |
submit_btn_1.click(
|
1478 |
fn=validate_and_submit_page1,
|
1479 |
+
inputs=[data_subset_state, user_info_state, *pairwise_inputs,
|
1480 |
*comparison_reasons_inputs, *ratings_A_page1, *ratings_B_page1],
|
1481 |
outputs=[page1_error_box, page1, final_page, page0_error_box, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning,
|
1482 |
+
page1_prompt, data_subset_state, user_info_state, *pairwise_inputs, *comparison_reasons_inputs, *ratings_A_page1, *ratings_B_page1],
|
1483 |
scroll_to_output=True
|
1484 |
)
|
1485 |
|