Spaces:
Sleeping
Sleeping
update
Browse files
app.py
CHANGED
@@ -734,7 +734,7 @@ def get_next_eval_question(user_info, our_methods, return_user_info=True, includ
|
|
734 |
with open(question_map_path, 'r') as f:
|
735 |
question_map = json.load(f)
|
736 |
|
737 |
-
print(f"\033[91m{question_map}\033[0m")
|
738 |
|
739 |
# 获取评估者目录
|
740 |
evaluator_directory = question_map.get(evaluator_id, None)
|
@@ -1091,7 +1091,7 @@ def advance_workflow(progress_state, data_subset_state, current_pairwise=None, c
|
|
1091 |
}
|
1092 |
|
1093 |
# Validate input for scoring
|
1094 |
-
if current_scoring is not None and
|
1095 |
ratings_A, ratings_B = current_scoring
|
1096 |
if any(rating is None for rating in ratings_A) or any(rating is None for rating in ratings_B):
|
1097 |
gr.Warning("Error: Please provide ratings for all criteria for both models.",
|
@@ -1187,9 +1187,10 @@ def submit_pairwise_scoring(progress_state, data_subset_state, user_info, *combi
|
|
1187 |
|
1188 |
|
1189 |
# Validate input
|
1190 |
-
if any(answer is None for answer in pairwise):
|
|
|
1191 |
# Return current state with no changes - let advance_workflow handle the structure
|
1192 |
-
ui_updates = advance_workflow(progress_state, data_subset_state, current_pairwise=pairwise)
|
1193 |
return [
|
1194 |
gr.update(visible=False), # page0
|
1195 |
gr.update(visible=True), # page1
|
@@ -1210,30 +1211,31 @@ def submit_pairwise_scoring(progress_state, data_subset_state, user_info, *combi
|
|
1210 |
*([gr.update() for _ in range(len_criteria)]), # ratings_B_page1 (keep current values)
|
1211 |
]
|
1212 |
|
1213 |
-
# Validate input - check if all ratings are provided
|
1214 |
-
if any(rating is None for rating in ratings_A) or any(rating is None for rating in ratings_B):
|
1215 |
-
|
1216 |
-
|
|
|
1217 |
|
1218 |
-
|
1219 |
-
|
1220 |
-
|
1221 |
-
|
1222 |
-
|
1223 |
-
|
1224 |
-
|
1225 |
-
|
1226 |
-
|
1227 |
-
|
1228 |
-
|
1229 |
-
|
1230 |
-
|
1231 |
-
|
1232 |
-
|
1233 |
-
|
1234 |
-
|
1235 |
-
|
1236 |
-
|
1237 |
|
1238 |
# Initialize pairwise_scores as method-keyed dict if it doesn't exist
|
1239 |
if 'pairwise_scores' not in progress_state:
|
|
|
734 |
with open(question_map_path, 'r') as f:
|
735 |
question_map = json.load(f)
|
736 |
|
737 |
+
# print(f"\033[91m{question_map}\033[0m")
|
738 |
|
739 |
# 获取评估者目录
|
740 |
evaluator_directory = question_map.get(evaluator_id, None)
|
|
|
1091 |
}
|
1092 |
|
1093 |
# Validate input for scoring
|
1094 |
+
if current_scoring is not None and (any(answer is None for answer in current_scoring[0]) or any(answer is None for answer in current_scoring[1])):
|
1095 |
ratings_A, ratings_B = current_scoring
|
1096 |
if any(rating is None for rating in ratings_A) or any(rating is None for rating in ratings_B):
|
1097 |
gr.Warning("Error: Please provide ratings for all criteria for both models.",
|
|
|
1187 |
|
1188 |
|
1189 |
# Validate input
|
1190 |
+
if any(answer is None for answer in pairwise) or any(rating is None for rating in ratings_A) or any(rating is None for rating in ratings_B):
|
1191 |
+
print("Error: Missing pairwise comparison answers.")
|
1192 |
# Return current state with no changes - let advance_workflow handle the structure
|
1193 |
+
ui_updates = advance_workflow(progress_state, data_subset_state, current_pairwise=pairwise, current_scoring=[ratings_A, ratings_B])
|
1194 |
return [
|
1195 |
gr.update(visible=False), # page0
|
1196 |
gr.update(visible=True), # page1
|
|
|
1211 |
*([gr.update() for _ in range(len_criteria)]), # ratings_B_page1 (keep current values)
|
1212 |
]
|
1213 |
|
1214 |
+
# # Validate input - check if all ratings are provided
|
1215 |
+
# if any(rating is None for rating in ratings_A) or any(rating is None for rating in ratings_B):
|
1216 |
+
# print("Error: Missing ratings for one or more criteria.")
|
1217 |
+
# # Return current state with no changes - let advance_workflow handle the structure
|
1218 |
+
# ui_updates = advance_workflow(progress_state, data_subset_state, current_scoring=[ratings_A, ratings_B])
|
1219 |
|
1220 |
+
# return [
|
1221 |
+
# gr.update(visible=False), # page0
|
1222 |
+
# gr.update(visible=True), # page1
|
1223 |
+
# "", # page0_error_box
|
1224 |
+
# ui_updates.get('page1_prompt'), # page1_prompt
|
1225 |
+
# user_info, # user_info_state
|
1226 |
+
# data_subset_state, # data_subset_state
|
1227 |
+
# ui_updates.get('progress_state'), # progress_state
|
1228 |
+
# progress_state.get('pairwise_results', {}), # pairwise_state
|
1229 |
+
# ui_updates.get('chat_a_answer'), # chat_a_answer
|
1230 |
+
# ui_updates.get('chat_b_answer'), # chat_b_answer
|
1231 |
+
# ui_updates.get('chat_a_reasoning'), # chat_a_reasoning
|
1232 |
+
# ui_updates.get('chat_b_reasoning'), # chat_b_reasoning
|
1233 |
+
# ui_updates.get('pairwise_header'), # pairwise_header
|
1234 |
+
# *([gr.update() for _ in range(len_criteria)]), # pairwise_inputs (keep current values)
|
1235 |
+
# *([gr.update() for _ in range(len_criteria)]), # comparison_reasons_inputs (keep current values)
|
1236 |
+
# *([gr.update() for _ in range(len_criteria)]), # ratings_A_page1 (keep current values)
|
1237 |
+
# *([gr.update() for _ in range(len_criteria)]), # ratings_B_page1 (keep current values)
|
1238 |
+
# ]
|
1239 |
|
1240 |
# Initialize pairwise_scores as method-keyed dict if it doesn't exist
|
1241 |
if 'pairwise_scores' not in progress_state:
|