Spaces:
Sleeping
Sleeping
Commit
·
8123796
1
Parent(s):
65511ce
Sync local directory to agenticx/ExpertEval
Browse files
app.py
CHANGED
|
@@ -43,20 +43,6 @@ except Exception as e:
|
|
| 43 |
print(f"Error reading HTML file: {e}")
|
| 44 |
TxAgent_Project_Page_HTML = "<p>Error: Project page content could not be loaded.</p>"
|
| 45 |
|
| 46 |
-
# # Assume 'your_image.png' is in the same directory
|
| 47 |
-
# with open("txagent.jpg", "rb") as image_file:
|
| 48 |
-
# encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
|
| 49 |
-
|
| 50 |
-
# image_html = f'<img src="data:image/png;base64,{encoded_string}" alt="Your Image">'
|
| 51 |
-
|
| 52 |
-
# TxAgent_Project_Page_HTML = f"""
|
| 53 |
-
# <div>
|
| 54 |
-
# <h2>Project Information</h2>
|
| 55 |
-
# <p>Here's some information about the TxAgent project.</p>
|
| 56 |
-
# {image_html}
|
| 57 |
-
# </div>
|
| 58 |
-
# """
|
| 59 |
-
|
| 60 |
# Load tool lists
|
| 61 |
fda_drug_labeling_tools_path = "fda_drug_labeling_tools.json"
|
| 62 |
monarch_tools_path = "monarch_tools.json"
|
|
@@ -175,18 +161,11 @@ mapping = { #for pairwise mapping between model comparison selections
|
|
| 175 |
#Prepare data
|
| 176 |
REPO_ID = "RichardZhu52/TxAgent_human_eval"
|
| 177 |
CROWDSOURCING_DATA_DIRECTORY = "crowdsourcing_eval_data_0430"
|
| 178 |
-
TXAGENT_RESULTS_SHEET_BASE_NAME = "
|
| 179 |
DISEASE_SPECIALTY_MAP_FILENAME = "disease_specialty_map.json"
|
|
|
|
| 180 |
|
| 181 |
-
def get_evaluator_questions(
|
| 182 |
-
relevant_diseases = []
|
| 183 |
-
for disease, specs in disease_map_data.items():
|
| 184 |
-
disease_specs = set(specs.get('specialties', []))
|
| 185 |
-
disease_subspecs = set(specs.get('subspecialties', []))
|
| 186 |
-
|
| 187 |
-
# Check for intersection
|
| 188 |
-
if user_all_specs.intersection(disease_specs) or user_all_specs.intersection(disease_subspecs):
|
| 189 |
-
relevant_diseases.append(disease)
|
| 190 |
|
| 191 |
# Filter to only the files in that directory
|
| 192 |
evaluator_files = [f for f in all_files if f.startswith(f"{evaluator_directory}/")]
|
|
@@ -203,46 +182,26 @@ def get_evaluator_questions(email, disease_map_data, user_all_specs, all_files,
|
|
| 203 |
model_name_key = os.path.basename(remote_path).replace('.json', '')
|
| 204 |
data_by_filename[model_name_key] = json.load(f)
|
| 205 |
|
| 206 |
-
# Filter questions based on relevant diseases derived from user specialties
|
| 207 |
-
evaluator_question_ids = []
|
| 208 |
-
relevant_diseases_lower = {disease.lower() for disease in relevant_diseases} # Convert relevant diseases to lowercase set for efficient lookup
|
| 209 |
-
|
| 210 |
-
# Assuming 'txagent' data is representative for question IDs and associated diseases
|
| 211 |
-
if 'txagent' in data_by_filename:
|
| 212 |
-
for entry in data_by_filename['txagent']:
|
| 213 |
-
question_id = entry.get("question_ID")
|
| 214 |
-
question_diseases = entry.get("disease", []) # Get diseases list, default to empty if missing
|
| 215 |
-
if question_id is not None and question_diseases:
|
| 216 |
-
# Convert question diseases to lowercase and check for intersection
|
| 217 |
-
question_diseases_lower = {disease.lower() for disease in question_diseases if isinstance(disease, str)}
|
| 218 |
-
if question_diseases_lower.intersection(relevant_diseases_lower):
|
| 219 |
-
evaluator_question_ids.append(question_id)
|
| 220 |
-
|
| 221 |
-
# Handle case where no relevant questions are found based on specialty
|
| 222 |
-
if not evaluator_question_ids:
|
| 223 |
-
return [], data_by_filename
|
| 224 |
-
|
| 225 |
#FINALLY, MAKE SURE THEY DIDNT ALREADY FILL IT OUT. Must go through every tuple of (question_ID, TxAgent, other model) where other model could be any of the other files in data_by_filename
|
| 226 |
model_names = [key for key in data_by_filename.keys() if key != 'txagent']
|
| 227 |
-
|
| 228 |
full_question_ids_list = []
|
| 229 |
for other_model_name in model_names:
|
| 230 |
for q_id in evaluator_question_ids:
|
| 231 |
full_question_ids_list.append((q_id, other_model_name))
|
| 232 |
|
| 233 |
-
results_df = read_sheet_to_df(custom_sheet_name=str(TXAGENT_RESULTS_SHEET_BASE_NAME))
|
| 234 |
if (results_df is not None) and (not results_df.empty):
|
| 235 |
# collect all (question_ID, other_model) pairs already seen
|
| 236 |
matched_pairs = set()
|
| 237 |
for _, row in results_df.iterrows():
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
matched_pairs.add((q, a))
|
| 246 |
|
| 247 |
# filter out any tuple whose (q_id, other_model) was already matched
|
| 248 |
full_question_ids_list = [
|
|
@@ -253,26 +212,31 @@ def get_evaluator_questions(email, disease_map_data, user_all_specs, all_files,
|
|
| 253 |
print(f"Filtered question IDs: {full_question_ids_list}")
|
| 254 |
print(f"Length of filtered question IDs: {len(full_question_ids_list)}")
|
| 255 |
|
| 256 |
-
|
| 257 |
return full_question_ids_list, data_by_filename
|
| 258 |
|
| 259 |
def go_to_page0_from_minus1():
|
| 260 |
return gr.update(visible=False), gr.update(visible=True)
|
| 261 |
|
| 262 |
-
def go_to_eval_progress_modal(name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id):
|
| 263 |
|
| 264 |
# ADDED: Validate that name and email are non-empty before proceeding
|
| 265 |
-
if not name or not email or not specialty_dd or not years_exp_radio:
|
| 266 |
-
return gr.update(visible=True), gr.update(visible=False), None, "Please fill out all the required fields (name, email, specialty, years of experience). If you are not a licensed physician with a specific specialty, please choose the specialty that most closely aligns with your biomedical expertise.", gr.Chatbot(), gr.Chatbot(), gr.HTML(),gr.State(),gr.update(visible=False), ""
|
| 267 |
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
|
| 274 |
#retrieve data from HF
|
| 275 |
-
evaluator_directory =
|
| 276 |
if evaluator_directory is None:
|
| 277 |
return gr.update(visible=True), gr.update(visible=False), None, "Invalid Evaluator ID, please try again.", gr.Chatbot(), gr.Chatbot(), gr.HTML(),gr.State(),gr.update(visible=False),""
|
| 278 |
all_files = list_repo_files(
|
|
@@ -281,17 +245,7 @@ def go_to_eval_progress_modal(name, email, specialty_dd, subspecialty_dd, years_
|
|
| 281 |
revision="main",
|
| 282 |
)
|
| 283 |
|
| 284 |
-
|
| 285 |
-
repo_id=REPO_ID,
|
| 286 |
-
filename=DISEASE_SPECIALTY_MAP_FILENAME,
|
| 287 |
-
repo_type="dataset",
|
| 288 |
-
revision="main",
|
| 289 |
-
)
|
| 290 |
-
|
| 291 |
-
with open(disease_specialty_map, 'r') as f:
|
| 292 |
-
disease_map_data = json.load(f)
|
| 293 |
-
|
| 294 |
-
full_question_ids_list, data_by_filename = get_evaluator_questions(email, disease_map_data, user_all_specs, all_files, evaluator_directory)
|
| 295 |
|
| 296 |
if len(full_question_ids_list) == 0:
|
| 297 |
return gr.update(visible=True), gr.update(visible=False), None, "Based on your submitted data, you have no more questions to evaluate. You may exit the page; we will follow-up if we require anything else from you. Thank you!", gr.Chatbot(), gr.Chatbot(), gr.HTML(),gr.State(),gr.update(visible=False),""
|
|
@@ -329,7 +283,7 @@ def go_to_eval_progress_modal(name, email, specialty_dd, subspecialty_dd, years_
|
|
| 329 |
}
|
| 330 |
|
| 331 |
#update user_info
|
| 332 |
-
user_info = (name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, q_id)
|
| 333 |
chat_A_value = format_chat(question_for_eval['models'][0]['reasoning_trace'], tool_database_labels)
|
| 334 |
chat_B_value = format_chat(question_for_eval['models'][1]['reasoning_trace'], tool_database_labels)
|
| 335 |
prompt_text = question_for_eval['question']
|
|
@@ -358,7 +312,7 @@ def go_to_eval_progress_modal(name, email, specialty_dd, subspecialty_dd, years_
|
|
| 358 |
avatar_images=None, # Optional: omit user/assistant icons
|
| 359 |
rtl=False
|
| 360 |
)
|
| 361 |
-
return gr.update(visible=True), gr.update(visible=False), user_info,"", chat_a, chat_b, page1_prompt, question_for_eval, gr.update(visible=True), f"You are about to evaluate the next question.
|
| 362 |
|
| 363 |
#goes to page 1 from confirmation modal that tells users how many questions they have left to evaluate
|
| 364 |
def go_to_page1():
|
|
@@ -470,6 +424,16 @@ centered_col_css = """
|
|
| 470 |
max-width: 800px; /* Adjust this width as desired */
|
| 471 |
width: 100%;
|
| 472 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 473 |
"""
|
| 474 |
with gr.Blocks(css=centered_col_css) as demo:
|
| 475 |
# States to save information between pages.
|
|
@@ -508,21 +472,14 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
| 508 |
</div>
|
| 509 |
""")
|
| 510 |
with gr.Row():
|
| 511 |
-
|
| 512 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 513 |
gr.HTML(TxAgent_Project_Page_HTML)
|
| 514 |
|
| 515 |
-
# Define actions for the new buttons
|
| 516 |
-
# For the Google Form button, we'll use JavaScript to open a new tab.
|
| 517 |
-
# The URL for the Google Form should be replaced with the actual link.
|
| 518 |
-
google_form_url = "https://forms.gle/pYvyvEQQwS5gdupQA" # Replace with your actual Google Form link
|
| 519 |
-
submit_questions_btn.click(
|
| 520 |
-
fn=None,
|
| 521 |
-
inputs=None,
|
| 522 |
-
outputs=None,
|
| 523 |
-
js=f"() => {{ window.open('{google_form_url}', '_blank'); }}"
|
| 524 |
-
)
|
| 525 |
-
|
| 526 |
# Page 0: Welcome / Informational page.
|
| 527 |
with gr.Column(visible=False, elem_id="page0") as page0:
|
| 528 |
gr.Markdown("## Welcome to the TxAgent Evalution Study!")
|
|
@@ -542,6 +499,7 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
| 542 |
gr.Markdown("## Please enter your information to get a question to evaluate. Please use the same email every time you log onto this evaluation portal, as we use your email to prevent showing repeat questions.")
|
| 543 |
name = gr.Textbox(label="Name (required)")
|
| 544 |
email = gr.Textbox(label="Email (required). Please use the same email every time you log onto this evaluation portal, as we use your email to prevent showing repeat questions.")
|
|
|
|
| 545 |
specialty_dd = gr.Dropdown(choices=specialties_list, label="Primary Medical Specialty (required). Go to https://www.abms.org/member-boards/specialty-subspecialty-certificates/ for categorization)", multiselect=True)
|
| 546 |
subspecialty_dd = gr.Dropdown(choices=subspecialties_list, label="Subspecialty (if applicable). Go to https://www.abms.org/member-boards/specialty-subspecialty-certificates/ for categorization)", multiselect=True)
|
| 547 |
npi_id = gr.Textbox(label="National Provider Identifier ID (optional). Got to https://npiregistry.cms.hhs.gov/search to search for your NPI ID. If you do not have an NPI ID, please leave this blank.")
|
|
@@ -759,7 +717,7 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
| 759 |
label=f"Score for Response B - {crit['label']}",
|
| 760 |
interactive=True)
|
| 761 |
with gr.Row():
|
| 762 |
-
clear_btn = gr.Button("Clear Selection", size="sm")
|
| 763 |
clear_btn.click(fn=clear_selection, outputs=[rating_a,rating_b])
|
| 764 |
|
| 765 |
# wire each to re‐restrict the other on change
|
|
@@ -787,7 +745,6 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
| 787 |
# Final Page: Thank you message.
|
| 788 |
with gr.Column(visible=False, elem_id="final_page") as final_page:
|
| 789 |
gr.Markdown("## You have no questions left to evaluate. Thank you for your participation!")
|
| 790 |
-
eval_again_btn = gr.Button("Evaluate Another Question")
|
| 791 |
|
| 792 |
# Error Modal: For displaying validation errors.
|
| 793 |
with Modal("Error", visible=False, elem_id="error_modal") as error_modal:
|
|
@@ -818,6 +775,7 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
| 818 |
"Timestamp": timestamp,
|
| 819 |
"Name": user_info[0],
|
| 820 |
"Email": user_info[1],
|
|
|
|
| 821 |
"Specialty": str(user_info[2]),
|
| 822 |
"Subspecialty": str(user_info[3]),
|
| 823 |
"Years of Experience": user_info[4],
|
|
@@ -849,23 +807,32 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
| 849 |
def final_submit(data_subset_state, user_info, pairwise, comparisons_reasons, *args):
|
| 850 |
# --- Part 1: Submit the current results (Existing Logic) ---
|
| 851 |
row_dict = build_row_dict(data_subset_state, user_info, pairwise, comparisons_reasons, *args)
|
| 852 |
-
|
|
|
|
| 853 |
|
| 854 |
# --- Part 2: Recalculate remaining questions (Existing Logic + Modified Error Handling) ---
|
| 855 |
# try:
|
| 856 |
|
| 857 |
# --- Re-fetch data and filter questions (Same logic as before) ---
|
| 858 |
-
|
| 859 |
-
|
| 860 |
-
|
| 861 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 862 |
|
| 863 |
-
|
| 864 |
-
|
| 865 |
-
|
| 866 |
-
|
|
|
|
|
|
|
| 867 |
|
| 868 |
-
full_question_ids_list, data_by_filename = get_evaluator_questions(
|
| 869 |
remaining_count = len(full_question_ids_list)
|
| 870 |
|
| 871 |
# --- Part 3: Determine UI updates based on remaining count ---
|
|
@@ -952,7 +919,7 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
| 952 |
gr.update(visible=False), # page2 (Hide)
|
| 953 |
gr.update(visible=False), # confirm_modal (Hide)
|
| 954 |
gr.update(visible=True), # eval_progress_modal (Show)
|
| 955 |
-
f"Submission successful!
|
| 956 |
gr.update(visible=False), # final_page (Hide)
|
| 957 |
"",
|
| 958 |
chat_a,
|
|
@@ -989,9 +956,6 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
| 989 |
reset_ratings_B = [gr.update(value=None) for i in range(len(criteria))]
|
| 990 |
|
| 991 |
return (
|
| 992 |
-
# pages
|
| 993 |
-
gr.update(visible=True), # page0
|
| 994 |
-
gr.update(visible=False), # final_page
|
| 995 |
|
| 996 |
# states
|
| 997 |
# gr.update(value=None), # user_info_state
|
|
@@ -1037,7 +1001,7 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
| 1037 |
# Transition from Page 0 (Welcome) to Page 1.
|
| 1038 |
next_btn_0.click(
|
| 1039 |
fn=go_to_eval_progress_modal,
|
| 1040 |
-
inputs=[name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id],
|
| 1041 |
outputs=[page0, page1, user_info_state, page0_error_box, chat_a, chat_b, page1_prompt, data_subset_state,eval_progress_modal,eval_progress_text],
|
| 1042 |
scroll_to_output=True
|
| 1043 |
)
|
|
@@ -1147,9 +1111,6 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
| 1147 |
fn=reset_everything_except_user_info,
|
| 1148 |
inputs=[],
|
| 1149 |
outputs=[
|
| 1150 |
-
# pages
|
| 1151 |
-
page0,
|
| 1152 |
-
final_page,
|
| 1153 |
|
| 1154 |
# states
|
| 1155 |
# user_info_state,
|
|
|
|
| 43 |
print(f"Error reading HTML file: {e}")
|
| 44 |
TxAgent_Project_Page_HTML = "<p>Error: Project page content could not be loaded.</p>"
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
# Load tool lists
|
| 47 |
fda_drug_labeling_tools_path = "fda_drug_labeling_tools.json"
|
| 48 |
monarch_tools_path = "monarch_tools.json"
|
|
|
|
| 161 |
#Prepare data
|
| 162 |
REPO_ID = "RichardZhu52/TxAgent_human_eval"
|
| 163 |
CROWDSOURCING_DATA_DIRECTORY = "crowdsourcing_eval_data_0430"
|
| 164 |
+
TXAGENT_RESULTS_SHEET_BASE_NAME = "TxAgent_Human_Eval_Results"
|
| 165 |
DISEASE_SPECIALTY_MAP_FILENAME = "disease_specialty_map.json"
|
| 166 |
+
QUESTION_MAP_FILENAME = "question_map.json"
|
| 167 |
|
| 168 |
+
def get_evaluator_questions(evaluator_id, all_files, evaluator_directory, question_map):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
|
| 170 |
# Filter to only the files in that directory
|
| 171 |
evaluator_files = [f for f in all_files if f.startswith(f"{evaluator_directory}/")]
|
|
|
|
| 182 |
model_name_key = os.path.basename(remote_path).replace('.json', '')
|
| 183 |
data_by_filename[model_name_key] = json.load(f)
|
| 184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
#FINALLY, MAKE SURE THEY DIDNT ALREADY FILL IT OUT. Must go through every tuple of (question_ID, TxAgent, other model) where other model could be any of the other files in data_by_filename
|
| 186 |
model_names = [key for key in data_by_filename.keys() if key != 'txagent']
|
| 187 |
+
evaluator_question_ids = question_map.get(evaluator_id).get('question_ids')
|
| 188 |
full_question_ids_list = []
|
| 189 |
for other_model_name in model_names:
|
| 190 |
for q_id in evaluator_question_ids:
|
| 191 |
full_question_ids_list.append((q_id, other_model_name))
|
| 192 |
|
| 193 |
+
results_df = read_sheet_to_df(custom_sheet_name=str(TXAGENT_RESULTS_SHEET_BASE_NAME + f"_{str(evaluator_id)}"))
|
| 194 |
if (results_df is not None) and (not results_df.empty):
|
| 195 |
# collect all (question_ID, other_model) pairs already seen
|
| 196 |
matched_pairs = set()
|
| 197 |
for _, row in results_df.iterrows():
|
| 198 |
+
q = row["Question ID"]
|
| 199 |
+
# pick whichever response isn’t 'txagent'
|
| 200 |
+
a, b = row["ResponseA_Model"], row["ResponseB_Model"]
|
| 201 |
+
if a == "txagent" and b != "txagent":
|
| 202 |
+
matched_pairs.add((q, b))
|
| 203 |
+
elif b == "txagent" and a != "txagent":
|
| 204 |
+
matched_pairs.add((q, a))
|
|
|
|
| 205 |
|
| 206 |
# filter out any tuple whose (q_id, other_model) was already matched
|
| 207 |
full_question_ids_list = [
|
|
|
|
| 212 |
print(f"Filtered question IDs: {full_question_ids_list}")
|
| 213 |
print(f"Length of filtered question IDs: {len(full_question_ids_list)}")
|
| 214 |
|
|
|
|
| 215 |
return full_question_ids_list, data_by_filename
|
| 216 |
|
| 217 |
def go_to_page0_from_minus1():
|
| 218 |
return gr.update(visible=False), gr.update(visible=True)
|
| 219 |
|
| 220 |
+
def go_to_eval_progress_modal(name, email, evaluator_id, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id):
|
| 221 |
|
| 222 |
# ADDED: Validate that name and email are non-empty before proceeding
|
| 223 |
+
if not name or not email or not evaluator_id or not specialty_dd or not years_exp_radio:
|
| 224 |
+
return gr.update(visible=True), gr.update(visible=False), None, "Please fill out all the required fields (name, email, evaluator ID, specialty, years of experience). If you are not a licensed physician with a specific specialty, please choose the specialty that most closely aligns with your biomedical expertise.", gr.Chatbot(), gr.Chatbot(), gr.HTML(),gr.State(),gr.update(visible=False), ""
|
| 225 |
|
| 226 |
+
question_map_path = hf_hub_download(
|
| 227 |
+
repo_id=REPO_ID,
|
| 228 |
+
filename=QUESTION_MAP_FILENAME,
|
| 229 |
+
repo_type="dataset", # or omit if it's a Model/Space
|
| 230 |
+
# force_download=True, # ← always fetch new copy
|
| 231 |
+
revision="main" # branch/tag/commit, fetches the most recent version of the dataset each time this command is called
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
# Load the question map from the downloaded file
|
| 235 |
+
with open(question_map_path, 'r') as f:
|
| 236 |
+
question_map = json.load(f)
|
| 237 |
|
| 238 |
#retrieve data from HF
|
| 239 |
+
evaluator_directory = question_map.get(evaluator_id, {}).get('evaluator_name', None)
|
| 240 |
if evaluator_directory is None:
|
| 241 |
return gr.update(visible=True), gr.update(visible=False), None, "Invalid Evaluator ID, please try again.", gr.Chatbot(), gr.Chatbot(), gr.HTML(),gr.State(),gr.update(visible=False),""
|
| 242 |
all_files = list_repo_files(
|
|
|
|
| 245 |
revision="main",
|
| 246 |
)
|
| 247 |
|
| 248 |
+
full_question_ids_list, data_by_filename = get_evaluator_questions(evaluator_id, all_files, evaluator_directory, question_map)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
|
| 250 |
if len(full_question_ids_list) == 0:
|
| 251 |
return gr.update(visible=True), gr.update(visible=False), None, "Based on your submitted data, you have no more questions to evaluate. You may exit the page; we will follow-up if we require anything else from you. Thank you!", gr.Chatbot(), gr.Chatbot(), gr.HTML(),gr.State(),gr.update(visible=False),""
|
|
|
|
| 283 |
}
|
| 284 |
|
| 285 |
#update user_info
|
| 286 |
+
user_info = (name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, q_id, evaluator_id)
|
| 287 |
chat_A_value = format_chat(question_for_eval['models'][0]['reasoning_trace'], tool_database_labels)
|
| 288 |
chat_B_value = format_chat(question_for_eval['models'][1]['reasoning_trace'], tool_database_labels)
|
| 289 |
prompt_text = question_for_eval['question']
|
|
|
|
| 312 |
avatar_images=None, # Optional: omit user/assistant icons
|
| 313 |
rtl=False
|
| 314 |
)
|
| 315 |
+
return gr.update(visible=True), gr.update(visible=False), user_info,"", chat_a, chat_b, page1_prompt, question_for_eval, gr.update(visible=True), f"You are about to evaluate the next question. You have {len(full_question_ids_list)} question(s) remaining to evaluate."
|
| 316 |
|
| 317 |
#goes to page 1 from confirmation modal that tells users how many questions they have left to evaluate
|
| 318 |
def go_to_page1():
|
|
|
|
| 424 |
max-width: 800px; /* Adjust this width as desired */
|
| 425 |
width: 100%;
|
| 426 |
}
|
| 427 |
+
#participate-btn {
|
| 428 |
+
background-color: purple !important;
|
| 429 |
+
color: white !important;
|
| 430 |
+
border-color: purple !important;
|
| 431 |
+
}
|
| 432 |
+
#clear_btn {
|
| 433 |
+
background-color: #F08080 !important;
|
| 434 |
+
color: white !important;
|
| 435 |
+
border-color: #F08080 !important;
|
| 436 |
+
}
|
| 437 |
"""
|
| 438 |
with gr.Blocks(css=centered_col_css) as demo:
|
| 439 |
# States to save information between pages.
|
|
|
|
| 472 |
</div>
|
| 473 |
""")
|
| 474 |
with gr.Row():
|
| 475 |
+
participate_eval_btn = gr.Button(
|
| 476 |
+
value="🌟 Participate in TxAgent Evaluation 🌟",
|
| 477 |
+
variant="primary",
|
| 478 |
+
size="lg",
|
| 479 |
+
elem_id="participate-btn"
|
| 480 |
+
)
|
| 481 |
gr.HTML(TxAgent_Project_Page_HTML)
|
| 482 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 483 |
# Page 0: Welcome / Informational page.
|
| 484 |
with gr.Column(visible=False, elem_id="page0") as page0:
|
| 485 |
gr.Markdown("## Welcome to the TxAgent Evalution Study!")
|
|
|
|
| 499 |
gr.Markdown("## Please enter your information to get a question to evaluate. Please use the same email every time you log onto this evaluation portal, as we use your email to prevent showing repeat questions.")
|
| 500 |
name = gr.Textbox(label="Name (required)")
|
| 501 |
email = gr.Textbox(label="Email (required). Please use the same email every time you log onto this evaluation portal, as we use your email to prevent showing repeat questions.")
|
| 502 |
+
evaluator_id = gr.Textbox(label="Evaluator ID (required). This is the four-digit ID you received from us for the evaluation study. If you do not have an Evaluator ID or are unsure about your Evaluator ID, please contact us.")
|
| 503 |
specialty_dd = gr.Dropdown(choices=specialties_list, label="Primary Medical Specialty (required). Go to https://www.abms.org/member-boards/specialty-subspecialty-certificates/ for categorization)", multiselect=True)
|
| 504 |
subspecialty_dd = gr.Dropdown(choices=subspecialties_list, label="Subspecialty (if applicable). Go to https://www.abms.org/member-boards/specialty-subspecialty-certificates/ for categorization)", multiselect=True)
|
| 505 |
npi_id = gr.Textbox(label="National Provider Identifier ID (optional). Got to https://npiregistry.cms.hhs.gov/search to search for your NPI ID. If you do not have an NPI ID, please leave this blank.")
|
|
|
|
| 717 |
label=f"Score for Response B - {crit['label']}",
|
| 718 |
interactive=True)
|
| 719 |
with gr.Row():
|
| 720 |
+
clear_btn = gr.Button("Clear Selection", size="sm",elem_id="clear_btn")
|
| 721 |
clear_btn.click(fn=clear_selection, outputs=[rating_a,rating_b])
|
| 722 |
|
| 723 |
# wire each to re‐restrict the other on change
|
|
|
|
| 745 |
# Final Page: Thank you message.
|
| 746 |
with gr.Column(visible=False, elem_id="final_page") as final_page:
|
| 747 |
gr.Markdown("## You have no questions left to evaluate. Thank you for your participation!")
|
|
|
|
| 748 |
|
| 749 |
# Error Modal: For displaying validation errors.
|
| 750 |
with Modal("Error", visible=False, elem_id="error_modal") as error_modal:
|
|
|
|
| 775 |
"Timestamp": timestamp,
|
| 776 |
"Name": user_info[0],
|
| 777 |
"Email": user_info[1],
|
| 778 |
+
"Evaluator ID": user_info[8],
|
| 779 |
"Specialty": str(user_info[2]),
|
| 780 |
"Subspecialty": str(user_info[3]),
|
| 781 |
"Years of Experience": user_info[4],
|
|
|
|
| 807 |
def final_submit(data_subset_state, user_info, pairwise, comparisons_reasons, *args):
|
| 808 |
# --- Part 1: Submit the current results (Existing Logic) ---
|
| 809 |
row_dict = build_row_dict(data_subset_state, user_info, pairwise, comparisons_reasons, *args)
|
| 810 |
+
_, _, _, _, _, _, _, _, evaluator_id = user_info
|
| 811 |
+
append_to_sheet(user_data=None, custom_row_dict=row_dict, custom_sheet_name=str(TXAGENT_RESULTS_SHEET_BASE_NAME + f"_{evaluator_id}"), add_header_when_create_sheet=True)
|
| 812 |
|
| 813 |
# --- Part 2: Recalculate remaining questions (Existing Logic + Modified Error Handling) ---
|
| 814 |
# try:
|
| 815 |
|
| 816 |
# --- Re-fetch data and filter questions (Same logic as before) ---
|
| 817 |
+
question_map_path = hf_hub_download(
|
| 818 |
+
repo_id=REPO_ID,
|
| 819 |
+
filename=QUESTION_MAP_FILENAME,
|
| 820 |
+
repo_type="dataset", # or omit if it's a Model/Space
|
| 821 |
+
# force_download=True, # ← always fetch new copy
|
| 822 |
+
revision="main" # branch/tag/commit, fetches the most recent version of the dataset each time this command is called
|
| 823 |
+
)
|
| 824 |
+
|
| 825 |
+
with open(question_map_path, 'r') as f:
|
| 826 |
+
question_map = json.load(f)
|
| 827 |
|
| 828 |
+
evaluator_directory = question_map.get(evaluator_id, {}).get('evaluator_name', None)
|
| 829 |
+
all_files = list_repo_files(
|
| 830 |
+
repo_id=REPO_ID,
|
| 831 |
+
repo_type="dataset",
|
| 832 |
+
revision="main",
|
| 833 |
+
)
|
| 834 |
|
| 835 |
+
full_question_ids_list, data_by_filename = get_evaluator_questions(evaluator_id, all_files, evaluator_directory, question_map)
|
| 836 |
remaining_count = len(full_question_ids_list)
|
| 837 |
|
| 838 |
# --- Part 3: Determine UI updates based on remaining count ---
|
|
|
|
| 919 |
gr.update(visible=False), # page2 (Hide)
|
| 920 |
gr.update(visible=False), # confirm_modal (Hide)
|
| 921 |
gr.update(visible=True), # eval_progress_modal (Show)
|
| 922 |
+
f"Submission successful! You have {remaining_count} question(s) remaining to evaluate. You may exit the page and return later if you wish.", # eval_progress_text
|
| 923 |
gr.update(visible=False), # final_page (Hide)
|
| 924 |
"",
|
| 925 |
chat_a,
|
|
|
|
| 956 |
reset_ratings_B = [gr.update(value=None) for i in range(len(criteria))]
|
| 957 |
|
| 958 |
return (
|
|
|
|
|
|
|
|
|
|
| 959 |
|
| 960 |
# states
|
| 961 |
# gr.update(value=None), # user_info_state
|
|
|
|
| 1001 |
# Transition from Page 0 (Welcome) to Page 1.
|
| 1002 |
next_btn_0.click(
|
| 1003 |
fn=go_to_eval_progress_modal,
|
| 1004 |
+
inputs=[name, email, evaluator_id, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id],
|
| 1005 |
outputs=[page0, page1, user_info_state, page0_error_box, chat_a, chat_b, page1_prompt, data_subset_state,eval_progress_modal,eval_progress_text],
|
| 1006 |
scroll_to_output=True
|
| 1007 |
)
|
|
|
|
| 1111 |
fn=reset_everything_except_user_info,
|
| 1112 |
inputs=[],
|
| 1113 |
outputs=[
|
|
|
|
|
|
|
|
|
|
| 1114 |
|
| 1115 |
# states
|
| 1116 |
# user_info_state,
|
utils.py
CHANGED
|
@@ -161,7 +161,11 @@ def append_to_sheet(user_data=None, custom_row_dict=None, custom_sheet_name=None
|
|
| 161 |
# Access the first worksheet
|
| 162 |
sheet = spreadsheet.sheet1
|
| 163 |
|
| 164 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
# headers come from the keys of our row dict
|
| 166 |
if custom_row_dict is not None:
|
| 167 |
headers = list(custom_row_dict.keys())
|
|
|
|
| 161 |
# Access the first worksheet
|
| 162 |
sheet = spreadsheet.sheet1
|
| 163 |
|
| 164 |
+
# Check if the sheet has any rows yet
|
| 165 |
+
existing_values = sheet.get_all_values()
|
| 166 |
+
is_empty = (existing_values == [[]]) #indicates empty spreadsheet that was cleared in the past
|
| 167 |
+
|
| 168 |
+
if (is_new or is_empty) and add_header_when_create_sheet:
|
| 169 |
# headers come from the keys of our row dict
|
| 170 |
if custom_row_dict is not None:
|
| 171 |
headers = list(custom_row_dict.keys())
|