shgao commited on
Commit
2e80564
·
1 Parent(s): b3d2534
Files changed (1) hide show
  1. app.py +283 -271
app.py CHANGED
@@ -398,6 +398,37 @@ def validate_required_fields(name, email, evaluator_id, specialty_dd, years_exp_
398
  return None
399
 
400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
401
  def go_to_page0_from_minus1():
402
  return gr.update(visible=False), gr.update(visible=True)
403
 
@@ -420,12 +451,13 @@ def go_to_eval_progress_modal(name, email, evaluator_id, specialty_dd, subspecia
420
  validation_error, # page0_error_box
421
  gr.update(visible=False), # eval_progress_modal
422
  "", # page1_prompt
423
- "", # page1_reference_answer
424
  "", # page2_prompt
 
 
425
  "", # eval_progress_text
426
  None, # user_info_state
427
  None, # data_subset_state
428
- None, # question_progress_state
429
  None, # pairwise_state
430
  [], # chat_a_page1
431
  [], # chat_b_page1
@@ -439,8 +471,9 @@ def go_to_eval_progress_modal(name, email, evaluator_id, specialty_dd, subspecia
439
  gr.Info("Please wait for a few seconds as we are loading the data...", duration=5)
440
 
441
  # Get initial question and data
442
- user_info, chat_a, chat_b, page1_prompt, page1_reference_answer, question_for_eval, remaining_count, progress_state = get_next_eval_question(
443
- name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, evaluator_id, our_methods
 
444
  )
445
 
446
  if remaining_count == 0 or user_info is None:
@@ -457,12 +490,13 @@ def go_to_eval_progress_modal(name, email, evaluator_id, specialty_dd, subspecia
457
  message, # page0_error_box
458
  gr.update(visible=False), # eval_progress_modal
459
  "", # page1_prompt
460
- "", # page1_reference_answer
461
  "", # page2_prompt
 
 
462
  "", # eval_progress_text
463
  None, # user_info_state
464
  None, # data_subset_state
465
- None, # question_progress_state
466
  None, # pairwise_state
467
  [], # chat_a_page1
468
  [], # chat_b_page1
@@ -475,7 +509,7 @@ def go_to_eval_progress_modal(name, email, evaluator_id, specialty_dd, subspecia
475
 
476
 
477
  # Use advance_workflow to get all UI updates
478
- ui_updates = advance_workflow(progress_state, question_for_eval)
479
 
480
  print(f"\033[93mIn go_to_eval_progress_modal, using advance_workflow results: mode={progress_state.get('mode')}\033[0m")
481
 
@@ -488,12 +522,13 @@ def go_to_eval_progress_modal(name, email, evaluator_id, specialty_dd, subspecia
488
  "", # page0_error_box
489
  gr.update(visible=True), # eval_progress_modal
490
  ui_updates.get('page1_prompt', ""), # page1_prompt
491
- page1_reference_answer, # page1_reference_answer
492
  ui_updates.get('page2_prompt', ""), # page2_prompt
 
 
493
  f"You are about to evaluate the next question. You have {remaining_count} question(s) remaining to evaluate.", # eval_progress_text
494
  user_info, # user_info_state
495
- question_for_eval, # data_subset_state
496
- ui_updates.get('progress_state', progress_state), # question_progress_state
497
  progress_state.get('pairwise_results', {}), # pairwise_state
498
  ui_updates.get('chat_a_page1', []), # chat_a_page1
499
  ui_updates.get('chat_b_page1', []), # chat_b_page1
@@ -507,12 +542,12 @@ def go_to_eval_progress_modal(name, email, evaluator_id, specialty_dd, subspecia
507
  # Helper to fetch a specific question by ID for resuming progress
508
 
509
 
510
- # def proceed_from_eval_progress_modal(question_progress_state):
511
  # """
512
  # Proceed from eval progress modal to the appropriate page based on current workflow mode
513
  # """
514
  # # Determine which page to show based on the current mode in progress_state
515
- # if question_progress_state and question_progress_state.get('mode') == 'scoring':
516
  # # For scoring mode, show page2 and hide page1
517
  # return (
518
  # gr.update(visible=False), # eval_progress_modal
@@ -686,7 +721,24 @@ def initialize_question_progress(models_list):
686
  }
687
 
688
 
689
- def get_next_eval_question(name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, evaluator_id, our_methods, return_user_info=True, include_correct_answer=True):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
690
  """
691
  获取下一个评估问题及其初始状态。
692
  职责:
@@ -694,12 +746,37 @@ def get_next_eval_question(name, email, specialty_dd, subspecialty_dd, years_exp
694
  2. 加载问题数据
695
  3. 初始化/加载问题进度状态
696
  4. 调用 advance_to_next_step 获取 UI 渲染
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
697
  """
 
 
 
 
 
 
 
 
 
 
698
  # 1. 验证用户输入
699
  validation_error = validate_required_fields(
700
  name, email, evaluator_id, specialty_dd, years_exp_radio)
701
  if validation_error:
702
- return None, gr.update(visible=True), gr.update(visible=False), None, "Wrong info.", None, 0, None
703
 
704
  # 2. 获取评估者问题映射
705
  question_map_path = hf_hub_download(
@@ -718,7 +795,7 @@ def get_next_eval_question(name, email, specialty_dd, subspecialty_dd, years_exp
718
  evaluator_directory = question_map.get(evaluator_id, None)
719
  if evaluator_directory is None:
720
  print(f"\033[91mEvaluator ID {evaluator_id} not found in question map.\033[0m")
721
- return None, gr.update(visible=True), gr.update(visible=False), None, "Invalid Evaluator ID, please try again.", None, 0, None
722
 
723
 
724
  all_files = list_repo_files(
@@ -733,7 +810,7 @@ def get_next_eval_question(name, email, specialty_dd, subspecialty_dd, years_exp
733
  evaluator_id, all_files, evaluator_directory, our_methods)
734
 
735
  if len(full_question_ids_list) == 0:
736
- return None, None, None, None, None, None, 0, None
737
 
738
  # 确定当前问题 ID 并收集模型数据
739
  full_question_ids_list = sorted(
@@ -796,52 +873,42 @@ def get_next_eval_question(name, email, specialty_dd, subspecialty_dd, years_exp
796
  correct_answer = e.get("correct_answer")
797
  break
798
 
799
- question_for_eval = {
800
  "question": question_text,
801
  "id": q_id,
802
  "models_full": models_full
803
  }
804
  if include_correct_answer:
805
- question_for_eval["correct_answer"] = correct_answer
 
 
 
 
806
 
807
- # 创建用户信息对象
808
- user_info = {
809
- 'name': name,
810
- 'email': email,
811
- 'specialty': specialty_dd,
812
- 'subspecialty': subspecialty_dd,
813
- 'years_exp': years_exp_radio,
814
- 'exp_explanation': exp_explanation_tb,
815
- 'npi_id': npi_id,
816
- 'question_id': q_id,
817
- 'evaluator_id': evaluator_id
818
- } if return_user_info else None
819
 
820
  # 4. 调用 advance_workflow 获取初始 UI 更新
821
- ui_updates = advance_workflow(progress_state, question_for_eval)
822
-
823
- # 插入包含正确答案的对象到返回值中的合适位置
824
- page1_reference_answer = gr.Markdown(
825
- correct_answer) if include_correct_answer else None
826
-
827
- # 根据当前模式选择合适的内容
828
- # 如果是 scoring 模式,使用 chat_a_page2 和 chat_b_page2
829
- # 如果是 pairwise 模式,使用 chat_a_page1 和 chat_b_page1
830
- chat_a_content = ui_updates.get('chat_a_page2') if progress_state.get(
831
- 'mode') == 'scoring' else ui_updates.get('chat_a_page1')
832
- chat_b_content = ui_updates.get('chat_b_page2') if progress_state.get(
833
- 'mode') == 'scoring' else ui_updates.get('chat_b_page1')
834
- page_prompt = ui_updates.get('page2_prompt') if progress_state.get(
835
- 'mode') == 'scoring' else ui_updates.get('page1_prompt')
836
-
837
- # 返回用户信息和 UI 更新,使用上面选择的内容
838
  return (
839
- user_info,
840
- chat_a_content, # 使用适合当前模式的内容
841
  chat_b_content, # 使用适合当前模式的内容
842
  page_prompt, # 使用适合当前模式的提示
843
- page1_reference_answer,
844
- question_for_eval,
845
  len(full_question_ids_list),
846
  ui_updates['progress_state']
847
  )
@@ -1498,24 +1565,28 @@ def submit_pairwise_scoring(progress_state, data_subset_state, user_info, *ratin
1498
  # Determine modal visibility based on completion status
1499
  all_scoring_done = (len(progress_state['scoring_done_pairs']) ==
1500
  len(progress_state['all_pairs']))
1501
- next_question_modal_visibility = gr.update(visible=all_scoring_done)
1502
 
1503
  return [
1504
- ui_updates.get('page1_visible'),
1505
- ui_updates.get('page2_visible'),
1506
- ui_updates.get('page1_prompt'),
1507
- ui_updates.get('page2_prompt'),
1508
- ui_updates.get('progress_state'),
1509
- ui_updates.get('chat_a_page1'),
1510
- ui_updates.get('chat_b_page1'),
1511
- ui_updates.get('chat_a_page2'),
1512
- ui_updates.get('chat_b_page2'),
1513
- *ui_updates.get('pairwise_radios'),
1514
- *ui_updates.get('pairwise_reasons'),
 
 
 
 
 
1515
  *ui_updates.get('ratings_A'),
1516
  *ui_updates.get('ratings_B'),
1517
  *ui_updates.get('pairwise_results_for_display'),
1518
- next_question_modal_visibility
1519
  ]
1520
 
1521
  # Initialize pairwise_scores as method-keyed dict if it doesn't exist
@@ -1555,62 +1626,119 @@ def submit_pairwise_scoring(progress_state, data_subset_state, user_info, *ratin
1555
  # Determine modal visibility based on completion status
1556
  all_scoring_done = (len(progress_state['scoring_done_pairs']) ==
1557
  len(progress_state['all_pairs']))
1558
- next_question_modal_visibility = gr.update(visible=all_scoring_done)
1559
-
1560
- # advance_workflow handles all UI updates properly
1561
- # Return UI updates using advance_workflow results directly
1562
- return [
1563
- ui_updates.get('page1_visible'), # 5
1564
- ui_updates.get('page2_visible'), # 6
1565
- ui_updates.get('page1_prompt'), # 4
1566
- ui_updates.get('page2_prompt'), # 25
1567
- ui_updates.get('progress_state'), # 1
1568
- ui_updates.get('chat_a_page1'), # 2
1569
- ui_updates.get('chat_b_page1'), # 3
1570
- ui_updates.get('chat_a_page2'), # 23
1571
- ui_updates.get('chat_b_page2'), # 24
1572
- *ui_updates.get('pairwise_radios'), # 7-14
1573
- *ui_updates.get('pairwise_reasons'), # 15-22
1574
- *ui_updates.get('ratings_A'),
1575
- *ui_updates.get('ratings_B'),
1576
- *ui_updates.get('pairwise_results_for_display'), # 26-33
1577
- next_question_modal_visibility # 34
1578
- ]
1579
-
1580
-
1581
- def proceed_to_next_question(user_info):
1582
- # Fetch next question state
1583
- user_info_new, chat_a, chat_b, page1_prompt, page1_reference_answer, question_for_eval, remaining_count, progress_state = get_next_eval_question(
1584
- user_info['name'], user_info['email'], user_info['specialty'], user_info['subspecialty'],
1585
- user_info['years_exp'], user_info['exp_explanation'], user_info['npi_id'], user_info['evaluator_id'], our_methods
1586
- )
1587
-
1588
- # 根据当前模式选择合适的内容,确保正确显示
1589
- chat_a_content = chat_a
1590
- chat_b_content = chat_b
1591
- page_prompt_content = page1_prompt
1592
-
1593
- # Check the current mode to determine which page should be visible
1594
- current_mode = progress_state.get('mode')
1595
- page1_visible = gr.update(visible=(current_mode == 'pairwise'))
1596
- page2_visible = gr.update(visible=(current_mode == 'scoring'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1597
 
1598
- print(
1599
- f"\033[93mIn proceed_to_next_question, mode is {current_mode}, setting page1_visible={page1_visible['visible']}, page2_visible={page2_visible['visible']}\033[0m")
1600
 
1601
- # Return exactly the elements bound in next_question_btn.click:
1602
- return [
1603
- user_info_new,
1604
- chat_a_content, # 使用适合当前模式的内容
1605
- chat_b_content, # 使用适合当前模式的内容
1606
- page_prompt_content, # 使用适合当前模式的提示
1607
- page1_reference_answer,
1608
- question_for_eval, # data_subset_state slot
1609
- progress_state, # question_progress_state slot
1610
- page1_visible, # page1 visibility based on mode
1611
- page2_visible, # page2 visibility based on mode
1612
- gr.update(visible=False) # next_question_modal hidden
1613
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1614
 
1615
  # --- Define Callback Functions for Confirmation Flow ---
1616
  def build_row_dict(
@@ -1656,64 +1784,6 @@ def build_row_dict(
1656
 
1657
  return row
1658
 
1659
- def final_submit(data_subset_state, user_info, pairwise, comparison_reasons, nonsense_btn_clicked, *args):
1660
- # --- Part 1: Submit the current results (Existing Logic) ---
1661
- row_dict = build_row_dict(data_subset_state, user_info,
1662
- pairwise, comparison_reasons, nonsense_btn_clicked, *args)
1663
- append_to_sheet(user_data=None, custom_row_dict=row_dict, custom_sheet_name=str(
1664
- TXAGENT_RESULTS_SHEET_BASE_NAME + f"_{user_info['evaluator_id']}"), add_header_when_create_sheet=True)
1665
-
1666
- # Get next question using dictionary values
1667
- user_info_new, chat_a, chat_b, page1_prompt, page1_reference_answer, question_for_eval, remaining_count, progress_state = get_next_eval_question(
1668
- user_info['name'],
1669
- user_info['email'],
1670
- user_info['specialty'],
1671
- user_info['subspecialty'],
1672
- user_info['years_exp'],
1673
- user_info['exp_explanation'],
1674
- user_info['npi_id'],
1675
- user_info['evaluator_id'],
1676
- our_methods
1677
- )
1678
-
1679
- if remaining_count == 0:
1680
- return [
1681
- gr.update(visible=False), # page0 (Hide)
1682
- gr.update(visible=False), # page2 (Hide)
1683
- gr.update(visible=False), # confirm_modal
1684
- gr.update(visible=False),
1685
- "",
1686
- gr.update(visible=True), # final_page (Show)
1687
- "",
1688
- None,
1689
- None,
1690
- None,
1691
- None,
1692
- None,
1693
- user_info_new,
1694
- progress_state
1695
- ]
1696
- return [
1697
- gr.update(visible=False), # page0 (Hide)
1698
- gr.update(visible=False), # page2 (Hide)
1699
- gr.update(visible=False), # confirm_modal (Hide)
1700
- gr.update(visible=True), # eval_progress_modal (Show)
1701
- # eval_progress_text
1702
- f"Submission successful! You have {remaining_count} question(s) remaining to evaluate. You may exit the page and return later if you wish.",
1703
- gr.update(visible=False), # final_page (Hide)
1704
- "",
1705
- chat_a,
1706
- chat_b,
1707
- page1_prompt,
1708
- page1_reference_answer,
1709
- question_for_eval,
1710
- user_info_new,
1711
- progress_state
1712
- ]
1713
-
1714
- def cancel_submission():
1715
- # Cancel final submission: just hide the confirmation modal.
1716
- return gr.update(visible=False)
1717
 
1718
  def reset_everything_except_user_info():
1719
 
@@ -1829,7 +1899,7 @@ with gr.Blocks(css=centered_col_css) as demo:
1829
  nonsense_btn_clicked = gr.State(False)
1830
  unqualified_A_state = gr.State()
1831
  data_subset_state = gr.State()
1832
- question_progress_state = gr.State()
1833
 
1834
  # Load specialty data
1835
  specialties_path = "specialties.json"
@@ -2129,13 +2199,13 @@ with gr.Blocks(css=centered_col_css) as demo:
2129
  # wire each to re‐restrict the other on change
2130
  rating_a.change(
2131
  fn=restrict_choices,
2132
- inputs=[question_progress_state, index_component,
2133
  rating_a, rating_b],
2134
  outputs=[rating_a, rating_b]
2135
  )
2136
  rating_b.change(
2137
  fn=restrict_choices,
2138
- inputs=[question_progress_state, index_component,
2139
  rating_a, rating_b],
2140
  outputs=[rating_a, rating_b]
2141
  )
@@ -2165,18 +2235,18 @@ with gr.Blocks(css=centered_col_css) as demo:
2165
  ok_btn.click(lambda: gr.update(visible=False), None, error_modal)
2166
 
2167
  # Confirmation Modal: Ask for final submission confirmation.
2168
- with Modal("Confirm Submission", visible=False, elem_id="confirm_modal") as confirm_modal:
2169
- gr.Markdown(
2170
- "Are you sure you want to submit? Once submitted, you cannot edit your responses.")
2171
- with gr.Row():
2172
- yes_btn = gr.Button("Yes, please submit")
2173
- cancel_btn = gr.Button("Cancel")
2174
 
2175
  # Add modal for proceeding to next question
2176
- with Modal("Next Question", visible=False, elem_id="next_question_modal") as next_question_modal:
2177
- gr.Markdown(
2178
- "You have completed this question. Click below to proceed to the next question.")
2179
- next_question_btn = gr.Button("Next Question")
2180
 
2181
 
2182
 
@@ -2196,8 +2266,8 @@ with gr.Blocks(css=centered_col_css) as demo:
2196
  subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id],
2197
  outputs=[
2198
  page0, page1, page2, page0_error_box, eval_progress_modal,
2199
- page1_prompt, page1_reference_answer, page2_prompt, eval_progress_text,
2200
- user_info_state, data_subset_state, question_progress_state, pairwise_state,
2201
  chat_a_page1, chat_b_page1, chat_a_page2, chat_b_page2,
2202
  *ratings_A, *ratings_B,
2203
  *pairwise_results_for_display
@@ -2207,7 +2277,7 @@ with gr.Blocks(css=centered_col_css) as demo:
2207
 
2208
  # eval_progress_proceed_btn.click(
2209
  # fn=proceed_from_eval_progress_modal,
2210
- # inputs=[question_progress_state],
2211
  # outputs=[eval_progress_modal, page0, page1, page2],
2212
  # scroll_to_output=True
2213
  # )
@@ -2230,12 +2300,12 @@ with gr.Blocks(css=centered_col_css) as demo:
2230
  # Transition from Page 1 (Pairwise) to the combined Rating Page (Page 2).
2231
  next_btn_1.click(
2232
  fn=submit_pairwise_comparison,
2233
- inputs=[question_progress_state, data_subset_state,
2234
  user_info_state, *pairwise_radios, *pairwise_reasons],
2235
  outputs=[
2236
  page1, page2,
2237
  page1_prompt, page2_prompt,
2238
- question_progress_state,
2239
  chat_a_page1, chat_b_page1, chat_a_page2, chat_b_page2,
2240
  *pairwise_radios,
2241
  *pairwise_reasons,
@@ -2253,28 +2323,33 @@ with gr.Blocks(css=centered_col_css) as demo:
2253
  # )
2254
 
2255
  # Wire up the modal button to proceed_to_next_question and reset all UI for the new question
2256
- next_question_btn.click(
2257
- fn=proceed_to_next_question,
2258
- inputs=[user_info_state],
2259
- outputs=[
2260
- user_info_state,
2261
- chat_a_page1, chat_b_page1, page1_prompt, page1_reference_answer,
2262
- data_subset_state, question_progress_state,
2263
- page1, page2, next_question_modal
2264
- ],
2265
- scroll_to_output=True
2266
- )
2267
 
2268
  submit_btn.click(
2269
  fn=submit_pairwise_scoring,
2270
- inputs=[question_progress_state,
2271
  data_subset_state, user_info_state, *ratings_A, *ratings_B],
2272
  outputs=[
2273
  page1, # gr.update(visible=False)
2274
  page2, # gr.update(visible=True)
2275
  page1_prompt, # None (page1_prompt)
2276
  page2_prompt, # page2_prompt_val
2277
- question_progress_state, # progress_state
 
 
 
 
 
2278
  chat_a_page1, # None (page1 chat_a)
2279
  chat_b_page1, # None (page1 chat_b)
2280
  chat_a_page2, # chat_a_val (不是None!)
@@ -2283,72 +2358,9 @@ with gr.Blocks(css=centered_col_css) as demo:
2283
  *pairwise_reasons, # reset texts - 修复:不应该使用ui_updates
2284
  *ratings_A, *ratings_B,
2285
  *pairwise_results_for_display, # pairwise results display
2286
- next_question_modal # next question modal visibility
2287
- ],
2288
- scroll_to_output=True
2289
- )
2290
-
2291
- # Finalize submission if user confirms.
2292
- question_submission_event = yes_btn.click(
2293
- fn=final_submit,
2294
- inputs=[data_subset_state, user_info_state, pairwise_state,
2295
- comparison_reasons, nonsense_btn_clicked, *ratings_A, *ratings_B],
2296
- outputs=[
2297
- page0, # Controlled by final_submit return value 1
2298
- page2, # Controlled by final_submit return value 2
2299
- confirm_modal, # Controlled by final_submit return value 3
2300
- eval_progress_modal, # Controlled by final_submit return value 4
2301
- eval_progress_text, # Controlled by final_submit return value 5
2302
- final_page, # Controlled by final_submit return value 6
2303
- page0_error_box,
2304
- chat_a_page1,
2305
- chat_b_page1,
2306
- page1_prompt,
2307
- page1_reference_answer,
2308
- data_subset_state,
2309
- user_info_state,
2310
- question_progress_state
2311
  ],
2312
  scroll_to_output=True
2313
  )
2314
 
2315
- # Cancel final submission.
2316
- cancel_btn.click(
2317
- fn=cancel_submission,
2318
- inputs=None,
2319
- outputs=confirm_modal
2320
- )
2321
-
2322
- # Reset everything and evaluate another question button
2323
- question_submission_event.then(
2324
- fn=reset_everything_except_user_info,
2325
- inputs=[],
2326
- outputs=[
2327
- # states
2328
- # user_info_state,
2329
- pairwise_state,
2330
- scores_A_state,
2331
- comparison_reasons,
2332
- unqualified_A_state,
2333
- # data_subset_state,
2334
-
2335
- # page0 elements that need to be reset
2336
- page0_error_box,
2337
-
2338
- # page1 elements that need to be reset
2339
- page1_error_box,
2340
-
2341
- # page2 elements that need to be reset
2342
- page2_prompt,
2343
- page2_reference_answer,
2344
- chat_a_page2,
2345
- chat_b_page2,
2346
- result_text,
2347
-
2348
- # lists of gradio elements that need to be unrolled
2349
- *ratings_A,
2350
- *ratings_B
2351
- ]
2352
- )
2353
-
2354
  demo.launch(share=True, allowed_paths=["."])
 
398
  return None
399
 
400
 
401
+ def create_user_info(name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, evaluator_id, question_id=None):
402
+ """
403
+ Create a user_info dictionary from individual user parameters.
404
+
405
+ Args:
406
+ name: User's name
407
+ email: User's email
408
+ specialty_dd: Primary medical specialty
409
+ subspecialty_dd: Medical subspecialty
410
+ years_exp_radio: Years of experience
411
+ exp_explanation_tb: Experience explanation
412
+ npi_id: NPI ID
413
+ evaluator_id: Evaluator ID
414
+ question_id: Question ID (optional, will be set later if None)
415
+
416
+ Returns:
417
+ dict: User information dictionary
418
+ """
419
+ return {
420
+ 'name': name,
421
+ 'email': email,
422
+ 'specialty': specialty_dd,
423
+ 'subspecialty': subspecialty_dd,
424
+ 'years_exp': years_exp_radio,
425
+ 'exp_explanation': exp_explanation_tb,
426
+ 'npi_id': npi_id,
427
+ 'evaluator_id': evaluator_id,
428
+ 'question_id': question_id
429
+ }
430
+
431
+
432
  def go_to_page0_from_minus1():
433
  return gr.update(visible=False), gr.update(visible=True)
434
 
 
451
  validation_error, # page0_error_box
452
  gr.update(visible=False), # eval_progress_modal
453
  "", # page1_prompt
 
454
  "", # page2_prompt
455
+ "", # page1_reference_answer
456
+ "", # page2_reference_answer
457
  "", # eval_progress_text
458
  None, # user_info_state
459
  None, # data_subset_state
460
+ None, # progress_state
461
  None, # pairwise_state
462
  [], # chat_a_page1
463
  [], # chat_b_page1
 
471
  gr.Info("Please wait for a few seconds as we are loading the data...", duration=5)
472
 
473
  # Get initial question and data
474
+ user_info = create_user_info(name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, evaluator_id)
475
+ user_info, chat_a, chat_b, page1_prompt, data_subset_state, remaining_count, progress_state = get_next_eval_question(
476
+ user_info, our_methods
477
  )
478
 
479
  if remaining_count == 0 or user_info is None:
 
490
  message, # page0_error_box
491
  gr.update(visible=False), # eval_progress_modal
492
  "", # page1_prompt
 
493
  "", # page2_prompt
494
+ "", # page1_reference_answer
495
+ "", # page2_reference_answer
496
  "", # eval_progress_text
497
  None, # user_info_state
498
  None, # data_subset_state
499
+ None, # progress_state
500
  None, # pairwise_state
501
  [], # chat_a_page1
502
  [], # chat_b_page1
 
509
 
510
 
511
  # Use advance_workflow to get all UI updates
512
+ ui_updates = advance_workflow(progress_state, data_subset_state)
513
 
514
  print(f"\033[93mIn go_to_eval_progress_modal, using advance_workflow results: mode={progress_state.get('mode')}\033[0m")
515
 
 
522
  "", # page0_error_box
523
  gr.update(visible=True), # eval_progress_modal
524
  ui_updates.get('page1_prompt', ""), # page1_prompt
 
525
  ui_updates.get('page2_prompt', ""), # page2_prompt
526
+ data_subset_state['reference_answer'], # page1_reference_answer
527
+ data_subset_state['reference_answer'], # page2_reference_answer
528
  f"You are about to evaluate the next question. You have {remaining_count} question(s) remaining to evaluate.", # eval_progress_text
529
  user_info, # user_info_state
530
+ data_subset_state, # data_subset_state
531
+ ui_updates.get('progress_state', progress_state), # progress_state
532
  progress_state.get('pairwise_results', {}), # pairwise_state
533
  ui_updates.get('chat_a_page1', []), # chat_a_page1
534
  ui_updates.get('chat_b_page1', []), # chat_b_page1
 
542
  # Helper to fetch a specific question by ID for resuming progress
543
 
544
 
545
+ # def proceed_from_eval_progress_modal(progress_state):
546
  # """
547
  # Proceed from eval progress modal to the appropriate page based on current workflow mode
548
  # """
549
  # # Determine which page to show based on the current mode in progress_state
550
+ # if progress_state and progress_state.get('mode') == 'scoring':
551
  # # For scoring mode, show page2 and hide page1
552
  # return (
553
  # gr.update(visible=False), # eval_progress_modal
 
721
  }
722
 
723
 
724
+ def _create_reference_answer_component(correct_answer, include_correct_answer=True):
725
+ """
726
+ Helper function to create reference answer component.
727
+
728
+ This centralizes the reference answer creation logic for consistency
729
+ across different functions.
730
+
731
+ Args:
732
+ correct_answer: The correct answer text
733
+ include_correct_answer: Whether to include the correct answer
734
+
735
+ Returns:
736
+ gr.Markdown component with correct answer or None
737
+ """
738
+ return gr.Markdown(correct_answer) if include_correct_answer and correct_answer else None
739
+
740
+
741
+ def get_next_eval_question(user_info, our_methods, return_user_info=True, include_correct_answer=True):
742
  """
743
  获取下一个评估问题及其初始状态。
744
  职责:
 
746
  2. 加载问题数据
747
  3. 初始化/加载问题进度状态
748
  4. 调用 advance_to_next_step 获取 UI 渲染
749
+
750
+ Args:
751
+ user_info (dict): User information dictionary containing:
752
+ - name: User's name
753
+ - email: User's email
754
+ - specialty: Primary medical specialty
755
+ - subspecialty: Medical subspecialty
756
+ - years_exp: Years of experience
757
+ - exp_explanation: Experience explanation
758
+ - npi_id: NPI ID
759
+ - evaluator_id: Evaluator ID
760
+ - question_id: Question ID (optional)
761
+ our_methods: List of our methods
762
+ return_user_info: Whether to return user info
763
+ include_correct_answer: Whether to include correct answer
764
  """
765
+ # Extract individual fields from user_info for compatibility
766
+ name = user_info.get('name')
767
+ email = user_info.get('email')
768
+ specialty_dd = user_info.get('specialty')
769
+ subspecialty_dd = user_info.get('subspecialty')
770
+ years_exp_radio = user_info.get('years_exp')
771
+ exp_explanation_tb = user_info.get('exp_explanation')
772
+ npi_id = user_info.get('npi_id')
773
+ evaluator_id = user_info.get('evaluator_id')
774
+
775
  # 1. 验证用户输入
776
  validation_error = validate_required_fields(
777
  name, email, evaluator_id, specialty_dd, years_exp_radio)
778
  if validation_error:
779
+ return None, gr.update(visible=True), gr.update(visible=False), "Wrong info.", None, 0, None
780
 
781
  # 2. 获取评估者问题映射
782
  question_map_path = hf_hub_download(
 
795
  evaluator_directory = question_map.get(evaluator_id, None)
796
  if evaluator_directory is None:
797
  print(f"\033[91mEvaluator ID {evaluator_id} not found in question map.\033[0m")
798
+ return None, gr.update(visible=True), gr.update(visible=False), "Invalid Evaluator ID, please try again.", None, 0, None
799
 
800
 
801
  all_files = list_repo_files(
 
810
  evaluator_id, all_files, evaluator_directory, our_methods)
811
 
812
  if len(full_question_ids_list) == 0:
813
+ return None, None, None, None, None, 0, None
814
 
815
  # 确定当前问题 ID 并收集模型数据
816
  full_question_ids_list = sorted(
 
873
  correct_answer = e.get("correct_answer")
874
  break
875
 
876
+ data_subset_state = {
877
  "question": question_text,
878
  "id": q_id,
879
  "models_full": models_full
880
  }
881
  if include_correct_answer:
882
+ data_subset_state["correct_answer"] = correct_answer
883
+ # Store reference answer component data for later extraction
884
+ data_subset_state["reference_answer"] = _create_reference_answer_component(correct_answer, include_correct_answer)
885
+ else:
886
+ data_subset_state["reference_answer"] = _create_reference_answer_component(None, include_correct_answer)
887
 
888
+ # 创建用户信息对象 (update question_id if not already set)
889
+ if return_user_info:
890
+ updated_user_info = user_info.copy()
891
+ updated_user_info['question_id'] = q_id
892
+ else:
893
+ updated_user_info = None
 
 
 
 
 
 
894
 
895
  # 4. 调用 advance_workflow 获取初始 UI 更新
896
+ ui_updates = advance_workflow(progress_state, data_subset_state)
897
+
898
+ # 使用 advance_workflow 返回的模式适配内容,通过统一的键映射自动选择
899
+ # advance_workflow 内部通过 extract_ui_content_by_mode 已经处理了模式选择和内容准备
900
+ chat_a_content = ui_updates.get('chat_a_page1') or ui_updates.get('chat_a_page2', [])
901
+ chat_b_content = ui_updates.get('chat_b_page1') or ui_updates.get('chat_b_page2', [])
902
+ page_prompt = ui_updates.get('page1_prompt') or ui_updates.get('page2_prompt', "")
903
+
904
+
905
+ # 返回用户信息和 UI 更新,使用 advance_workflow 提供的内容
 
 
 
 
 
 
 
906
  return (
907
+ updated_user_info,
908
+ chat_a_content, # 由 advance_workflow 提供的模式适配内容
909
  chat_b_content, # 使用适合当前模式的内容
910
  page_prompt, # 使用适合当前模式的提示
911
+ data_subset_state,
 
912
  len(full_question_ids_list),
913
  ui_updates['progress_state']
914
  )
 
1565
  # Determine modal visibility based on completion status
1566
  all_scoring_done = (len(progress_state['scoring_done_pairs']) ==
1567
  len(progress_state['all_pairs']))
1568
+ # next_question_modal_visibility = gr.update(visible=all_scoring_done)
1569
 
1570
  return [
1571
+ ui_updates.get('page1_visible'), # 5
1572
+ ui_updates.get('page2_visible'), # 6
1573
+ ui_updates.get('page1_prompt'), # 4
1574
+ ui_updates.get('page2_prompt'), # 25
1575
+ data_subset_state['reference_answer'], # page1_reference_answer
1576
+ data_subset_state['reference_answer'], # page2_reference_answer
1577
+ user_info, # user_info_state
1578
+ data_subset_state, # data_subset_state
1579
+ ui_updates.get('progress_state'), # 1
1580
+ progress_state.get('pairwise_results', {}), # pairwise_state
1581
+ ui_updates.get('chat_a_page1'), # 2
1582
+ ui_updates.get('chat_b_page1'), # 3
1583
+ ui_updates.get('chat_a_page2'), # 23
1584
+ ui_updates.get('chat_b_page2'), # 24
1585
+ *ui_updates.get('pairwise_radios'), # 7-14
1586
+ *ui_updates.get('pairwise_reasons'), # 15-22
1587
  *ui_updates.get('ratings_A'),
1588
  *ui_updates.get('ratings_B'),
1589
  *ui_updates.get('pairwise_results_for_display'),
 
1590
  ]
1591
 
1592
  # Initialize pairwise_scores as method-keyed dict if it doesn't exist
 
1626
  # Determine modal visibility based on completion status
1627
  all_scoring_done = (len(progress_state['scoring_done_pairs']) ==
1628
  len(progress_state['all_pairs']))
1629
+ # next_question_modal_visibility = gr.update(visible=all_scoring_done)
1630
+
1631
+
1632
+ if not all_scoring_done:
1633
+ # advance_workflow handles all UI updates properly
1634
+ # Return UI updates using advance_workflow results directly
1635
+ return [
1636
+ ui_updates.get('page1_visible'), # 5
1637
+ ui_updates.get('page2_visible'), # 6
1638
+ ui_updates.get('page1_prompt'), # 4
1639
+ ui_updates.get('page2_prompt'), # 25
1640
+ data_subset_state['reference_answer'], # page1_reference_answer
1641
+ data_subset_state['reference_answer'], # page2_reference_answer
1642
+ user_info, # user_info_state
1643
+ data_subset_state, # data_subset_state
1644
+ ui_updates.get('progress_state'), # 1
1645
+ progress_state.get('pairwise_results', {}), # pairwise_state
1646
+ ui_updates.get('chat_a_page1'), # 2
1647
+ ui_updates.get('chat_b_page1'), # 3
1648
+ ui_updates.get('chat_a_page2'), # 23
1649
+ ui_updates.get('chat_b_page2'), # 24
1650
+ *ui_updates.get('pairwise_radios'), # 7-14
1651
+ *ui_updates.get('pairwise_reasons'), # 15-22
1652
+ *ui_updates.get('ratings_A'),
1653
+ *ui_updates.get('ratings_B'),
1654
+ *ui_updates.get('pairwise_results_for_display'),
1655
+ ]
1656
+
1657
+ user_info, chat_a, chat_b, page1_prompt, data_subset_state, remaining_count, progress_state = get_next_eval_question(
1658
+ user_info, our_methods
1659
+ )
1660
+
1661
+ if remaining_count == 0: # code TODO
1662
+ gr.Info("You have no more questions to evaluate. You may exit the page; we will follow-up if we require anything else from you. Thank you!")
1663
+ return [
1664
+ ui_updates.get('page1_visible'), # 5
1665
+ ui_updates.get('page2_visible'), # 6
1666
+ ui_updates.get('page1_prompt'), # 4
1667
+ ui_updates.get('page2_prompt'), # 25
1668
+ data_subset_state['reference_answer'], # page1_reference_answer
1669
+ data_subset_state['reference_answer'], # page2_reference_answer
1670
+ user_info, # user_info_state
1671
+ data_subset_state, # data_subset_state
1672
+ ui_updates.get('progress_state'), # 1
1673
+ progress_state.get('pairwise_results', {}), # pairwise_state
1674
+ ui_updates.get('chat_a_page1'), # 2
1675
+ ui_updates.get('chat_b_page1'), # 3
1676
+ ui_updates.get('chat_a_page2'), # 23
1677
+ ui_updates.get('chat_b_page2'), # 24
1678
+ *ui_updates.get('pairwise_radios'), # 7-14
1679
+ *ui_updates.get('pairwise_reasons'), # 15-22
1680
+ *ui_updates.get('ratings_A'),
1681
+ *ui_updates.get('ratings_B'),
1682
+ *ui_updates.get('pairwise_results_for_display'), # 26-33
1683
+ # next_question_modal_visibility # 34
1684
+ ]
1685
+ # Use advance_workflow to get all UI updates
1686
+ ui_updates = advance_workflow(progress_state, data_subset_state)
1687
+ print(f"\033[93mIn submit_pairwise_scoring, using advance_workflow results: mode={progress_state.get('mode')}\033[0m")
1688
+ gr.Info(f"You are about to evaluate the next question. You have {remaining_count} question(s) remaining to evaluate.") # eval_progress_text
1689
+ return (
1690
+ ui_updates.get('page1_visible'), # 5
1691
+ ui_updates.get('page2_visible'), # 6
1692
+ ui_updates.get('page1_prompt'), # 4
1693
+ ui_updates.get('page2_prompt'), # 25
1694
+ data_subset_state['reference_answer'], # page1_reference_answer
1695
+ data_subset_state['reference_answer'], # page2_reference_answer
1696
+ user_info, # user_info_state
1697
+ data_subset_state, # data_subset_state
1698
+ ui_updates.get('progress_state'), # 1
1699
+ progress_state.get('pairwise_results', {}), # pairwise_state
1700
+ ui_updates.get('chat_a_page1'), # 2
1701
+ ui_updates.get('chat_b_page1'), # 3
1702
+ ui_updates.get('chat_a_page2'), # 23
1703
+ ui_updates.get('chat_b_page2'), # 24
1704
+ *ui_updates.get('pairwise_radios'), # 7-14
1705
+ *ui_updates.get('pairwise_reasons'), # 15-22
1706
+ *ui_updates.get('ratings_A'),
1707
+ *ui_updates.get('ratings_B'),
1708
+ *ui_updates.get('pairwise_results_for_display'),
1709
+ # next_question_modal_visibility
1710
+ )
1711
 
 
 
1712
 
1713
+ # def proceed_to_next_question(user_info):
1714
+ # """
1715
+ # Refactored to reuse code from go_to_eval_progress_modal by implementing it using advance_workflow.
1716
+ # This eliminates code duplication and ensures consistent UI behavior.
1717
+ # """
1718
+ # # Fetch next question state
1719
+ # user_info_new, chat_a, chat_b, page1_prompt, data_subset_state, remaining_count, progress_state = get_next_eval_question(
1720
+ # user_info['name'], user_info['email'], user_info['specialty'], user_info['subspecialty'],
1721
+ # user_info['years_exp'], user_info['exp_explanation'], user_info['npi_id'], user_info['evaluator_id'], our_methods
1722
+ # )
1723
+
1724
+ # # Use advance_workflow to get all UI updates (same pattern as go_to_eval_progress_modal)
1725
+ # ui_updates = advance_workflow(progress_state, data_subset_state)
1726
+
1727
+ # print(f"\033[93mIn proceed_to_next_question, using advance_workflow results: mode={progress_state.get('mode')}\033[0m")
1728
+
1729
+ # # Return exactly the elements bound in next_question_btn.click:
1730
+ # return [
1731
+ # user_info_new,
1732
+ # ui_updates.get('chat_a_page1', chat_a), # 使用适合当前模式的内容
1733
+ # ui_updates.get('chat_b_page1', chat_b), # 使用适合当前模式的内容
1734
+ # ui_updates.get('page1_prompt', page1_prompt), # 使用适合当前模式的提示
1735
+ # page1_reference_answer,
1736
+ # data_subset_state, # data_subset_state slot
1737
+ # ui_updates.get('progress_state', progress_state), # progress_state slot
1738
+ # ui_updates.get('page1_visible', gr.update(visible=True)), # page1 visibility based on mode
1739
+ # ui_updates.get('page2_visible', gr.update(visible=False)), # page2 visibility based on mode
1740
+ # gr.update(visible=False) # next_question_modal hidden
1741
+ # ]
1742
 
1743
  # --- Define Callback Functions for Confirmation Flow ---
1744
  def build_row_dict(
 
1784
 
1785
  return row
1786
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1787
 
1788
  def reset_everything_except_user_info():
1789
 
 
1899
  nonsense_btn_clicked = gr.State(False)
1900
  unqualified_A_state = gr.State()
1901
  data_subset_state = gr.State()
1902
+ progress_state = gr.State()
1903
 
1904
  # Load specialty data
1905
  specialties_path = "specialties.json"
 
2199
  # wire each to re‐restrict the other on change
2200
  rating_a.change(
2201
  fn=restrict_choices,
2202
+ inputs=[progress_state, index_component,
2203
  rating_a, rating_b],
2204
  outputs=[rating_a, rating_b]
2205
  )
2206
  rating_b.change(
2207
  fn=restrict_choices,
2208
+ inputs=[progress_state, index_component,
2209
  rating_a, rating_b],
2210
  outputs=[rating_a, rating_b]
2211
  )
 
2235
  ok_btn.click(lambda: gr.update(visible=False), None, error_modal)
2236
 
2237
  # Confirmation Modal: Ask for final submission confirmation.
2238
+ # with Modal("Confirm Submission", visible=False, elem_id="confirm_modal") as confirm_modal:
2239
+ # gr.Markdown(
2240
+ # "Are you sure you want to submit? Once submitted, you cannot edit your responses.")
2241
+ # with gr.Row():
2242
+ # yes_btn = gr.Button("Yes, please submit")
2243
+ # cancel_btn = gr.Button("Cancel")
2244
 
2245
  # Add modal for proceeding to next question
2246
+ # with Modal("Next Question", visible=False, elem_id="next_question_modal") as next_question_modal:
2247
+ # gr.Markdown(
2248
+ # "You have completed this question. Click below to proceed to the next question.")
2249
+ # next_question_btn = gr.Button("Next Question")
2250
 
2251
 
2252
 
 
2266
  subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id],
2267
  outputs=[
2268
  page0, page1, page2, page0_error_box, eval_progress_modal,
2269
+ page1_prompt, page2_prompt, page1_reference_answer, page2_reference_answer, eval_progress_text,
2270
+ user_info_state, data_subset_state, progress_state, pairwise_state,
2271
  chat_a_page1, chat_b_page1, chat_a_page2, chat_b_page2,
2272
  *ratings_A, *ratings_B,
2273
  *pairwise_results_for_display
 
2277
 
2278
  # eval_progress_proceed_btn.click(
2279
  # fn=proceed_from_eval_progress_modal,
2280
+ # inputs=[progress_state],
2281
  # outputs=[eval_progress_modal, page0, page1, page2],
2282
  # scroll_to_output=True
2283
  # )
 
2300
  # Transition from Page 1 (Pairwise) to the combined Rating Page (Page 2).
2301
  next_btn_1.click(
2302
  fn=submit_pairwise_comparison,
2303
+ inputs=[progress_state, data_subset_state,
2304
  user_info_state, *pairwise_radios, *pairwise_reasons],
2305
  outputs=[
2306
  page1, page2,
2307
  page1_prompt, page2_prompt,
2308
+ progress_state,
2309
  chat_a_page1, chat_b_page1, chat_a_page2, chat_b_page2,
2310
  *pairwise_radios,
2311
  *pairwise_reasons,
 
2323
  # )
2324
 
2325
  # Wire up the modal button to proceed_to_next_question and reset all UI for the new question
2326
+ # next_question_btn.click(
2327
+ # fn=proceed_to_next_question,
2328
+ # inputs=[user_info_state],
2329
+ # outputs=[
2330
+ # user_info_state,
2331
+ # chat_a_page1, chat_b_page1, page1_prompt, page1_reference_answer,
2332
+ # data_subset_state, progress_state,
2333
+ # page1, page2, next_question_modal
2334
+ # ],
2335
+ # scroll_to_output=True
2336
+ # )
2337
 
2338
  submit_btn.click(
2339
  fn=submit_pairwise_scoring,
2340
+ inputs=[progress_state,
2341
  data_subset_state, user_info_state, *ratings_A, *ratings_B],
2342
  outputs=[
2343
  page1, # gr.update(visible=False)
2344
  page2, # gr.update(visible=True)
2345
  page1_prompt, # None (page1_prompt)
2346
  page2_prompt, # page2_prompt_val
2347
+ page1_reference_answer, # page1_reference_answer
2348
+ page2_reference_answer, # page2_reference_answer
2349
+ user_info_state,
2350
+ data_subset_state,
2351
+ progress_state, # progress_state
2352
+ pairwise_state,
2353
  chat_a_page1, # None (page1 chat_a)
2354
  chat_b_page1, # None (page1 chat_b)
2355
  chat_a_page2, # chat_a_val (不是None!)
 
2358
  *pairwise_reasons, # reset texts - 修复:不应该使用ui_updates
2359
  *ratings_A, *ratings_B,
2360
  *pairwise_results_for_display, # pairwise results display
2361
+ # next_question_modal_visibility # next question modal visibility
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2362
  ],
2363
  scroll_to_output=True
2364
  )
2365
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2366
  demo.launch(share=True, allowed_paths=["."])