shgao commited on
Commit
c4fe0cf
·
1 Parent(s): d1eab08
Files changed (1) hide show
  1. app.py +29 -27
app.py CHANGED
@@ -734,7 +734,7 @@ def get_next_eval_question(user_info, our_methods, return_user_info=True, includ
734
  with open(question_map_path, 'r') as f:
735
  question_map = json.load(f)
736
 
737
- print(f"\033[91m{question_map}\033[0m")
738
 
739
  # 获取评估者目录
740
  evaluator_directory = question_map.get(evaluator_id, None)
@@ -1091,7 +1091,7 @@ def advance_workflow(progress_state, data_subset_state, current_pairwise=None, c
1091
  }
1092
 
1093
  # Validate input for scoring
1094
- if current_scoring is not None and progress_state.get('mode') == 'scoring':
1095
  ratings_A, ratings_B = current_scoring
1096
  if any(rating is None for rating in ratings_A) or any(rating is None for rating in ratings_B):
1097
  gr.Warning("Error: Please provide ratings for all criteria for both models.",
@@ -1187,9 +1187,10 @@ def submit_pairwise_scoring(progress_state, data_subset_state, user_info, *combi
1187
 
1188
 
1189
  # Validate input
1190
- if any(answer is None for answer in pairwise):
 
1191
  # Return current state with no changes - let advance_workflow handle the structure
1192
- ui_updates = advance_workflow(progress_state, data_subset_state, current_pairwise=pairwise)
1193
  return [
1194
  gr.update(visible=False), # page0
1195
  gr.update(visible=True), # page1
@@ -1210,30 +1211,31 @@ def submit_pairwise_scoring(progress_state, data_subset_state, user_info, *combi
1210
  *([gr.update() for _ in range(len_criteria)]), # ratings_B_page1 (keep current values)
1211
  ]
1212
 
1213
- # Validate input - check if all ratings are provided
1214
- if any(rating is None for rating in ratings_A) or any(rating is None for rating in ratings_B):
1215
- # Return current state with no changes - let advance_workflow handle the structure
1216
- ui_updates = advance_workflow(progress_state, data_subset_state, current_scoring=[ratings_A, ratings_B])
 
1217
 
1218
- return [
1219
- gr.update(visible=False), # page0
1220
- gr.update(visible=True), # page1
1221
- "", # page0_error_box
1222
- ui_updates.get('page1_prompt'), # page1_prompt
1223
- user_info, # user_info_state
1224
- data_subset_state, # data_subset_state
1225
- ui_updates.get('progress_state'), # progress_state
1226
- progress_state.get('pairwise_results', {}), # pairwise_state
1227
- ui_updates.get('chat_a_answer'), # chat_a_answer
1228
- ui_updates.get('chat_b_answer'), # chat_b_answer
1229
- ui_updates.get('chat_a_reasoning'), # chat_a_reasoning
1230
- ui_updates.get('chat_b_reasoning'), # chat_b_reasoning
1231
- ui_updates.get('pairwise_header'), # pairwise_header
1232
- *([gr.update() for _ in range(len_criteria)]), # pairwise_inputs (keep current values)
1233
- *([gr.update() for _ in range(len_criteria)]), # comparison_reasons_inputs (keep current values)
1234
- *([gr.update() for _ in range(len_criteria)]), # ratings_A_page1 (keep current values)
1235
- *([gr.update() for _ in range(len_criteria)]), # ratings_B_page1 (keep current values)
1236
- ]
1237
 
1238
  # Initialize pairwise_scores as method-keyed dict if it doesn't exist
1239
  if 'pairwise_scores' not in progress_state:
 
734
  with open(question_map_path, 'r') as f:
735
  question_map = json.load(f)
736
 
737
+ # print(f"\033[91m{question_map}\033[0m")
738
 
739
  # 获取评估者目录
740
  evaluator_directory = question_map.get(evaluator_id, None)
 
1091
  }
1092
 
1093
  # Validate input for scoring
1094
+ if current_scoring is not None and (any(answer is None for answer in current_scoring[0]) or any(answer is None for answer in current_scoring[1])):
1095
  ratings_A, ratings_B = current_scoring
1096
  if any(rating is None for rating in ratings_A) or any(rating is None for rating in ratings_B):
1097
  gr.Warning("Error: Please provide ratings for all criteria for both models.",
 
1187
 
1188
 
1189
  # Validate input
1190
+ if any(answer is None for answer in pairwise) or any(rating is None for rating in ratings_A) or any(rating is None for rating in ratings_B):
1191
+ print("Error: Missing pairwise comparison answers.")
1192
  # Return current state with no changes - let advance_workflow handle the structure
1193
+ ui_updates = advance_workflow(progress_state, data_subset_state, current_pairwise=pairwise, current_scoring=[ratings_A, ratings_B])
1194
  return [
1195
  gr.update(visible=False), # page0
1196
  gr.update(visible=True), # page1
 
1211
  *([gr.update() for _ in range(len_criteria)]), # ratings_B_page1 (keep current values)
1212
  ]
1213
 
1214
+ # # Validate input - check if all ratings are provided
1215
+ # if any(rating is None for rating in ratings_A) or any(rating is None for rating in ratings_B):
1216
+ # print("Error: Missing ratings for one or more criteria.")
1217
+ # # Return current state with no changes - let advance_workflow handle the structure
1218
+ # ui_updates = advance_workflow(progress_state, data_subset_state, current_scoring=[ratings_A, ratings_B])
1219
 
1220
+ # return [
1221
+ # gr.update(visible=False), # page0
1222
+ # gr.update(visible=True), # page1
1223
+ # "", # page0_error_box
1224
+ # ui_updates.get('page1_prompt'), # page1_prompt
1225
+ # user_info, # user_info_state
1226
+ # data_subset_state, # data_subset_state
1227
+ # ui_updates.get('progress_state'), # progress_state
1228
+ # progress_state.get('pairwise_results', {}), # pairwise_state
1229
+ # ui_updates.get('chat_a_answer'), # chat_a_answer
1230
+ # ui_updates.get('chat_b_answer'), # chat_b_answer
1231
+ # ui_updates.get('chat_a_reasoning'), # chat_a_reasoning
1232
+ # ui_updates.get('chat_b_reasoning'), # chat_b_reasoning
1233
+ # ui_updates.get('pairwise_header'), # pairwise_header
1234
+ # *([gr.update() for _ in range(len_criteria)]), # pairwise_inputs (keep current values)
1235
+ # *([gr.update() for _ in range(len_criteria)]), # comparison_reasons_inputs (keep current values)
1236
+ # *([gr.update() for _ in range(len_criteria)]), # ratings_A_page1 (keep current values)
1237
+ # *([gr.update() for _ in range(len_criteria)]), # ratings_B_page1 (keep current values)
1238
+ # ]
1239
 
1240
  # Initialize pairwise_scores as method-keyed dict if it doesn't exist
1241
  if 'pairwise_scores' not in progress_state: