Spaces:

agenticx
/

TxAgentRAOEval

Sleeping

App Files Files Community

shgao commited on Jul 16

Commit

c4fe0cf

1 Parent(s): d1eab08

update

Browse files

Files changed (1) hide show

app.py +29 -27

app.py CHANGED Viewed

@@ -734,7 +734,7 @@ def get_next_eval_question(user_info, our_methods, return_user_info=True, includ
     with open(question_map_path, 'r') as f:
         question_map = json.load(f)
-    print(f"\033[91m{question_map}\033[0m")
     # 获取评估者目录
     evaluator_directory = question_map.get(evaluator_id, None)
@@ -1091,7 +1091,7 @@ def advance_workflow(progress_state, data_subset_state, current_pairwise=None, c
         }
     # Validate input for scoring
-    if current_scoring is not None and progress_state.get('mode') == 'scoring':
         ratings_A, ratings_B = current_scoring
         if any(rating is None for rating in ratings_A) or any(rating is None for rating in ratings_B):
             gr.Warning("Error: Please provide ratings for all criteria for both models.",
@@ -1187,9 +1187,10 @@ def submit_pairwise_scoring(progress_state, data_subset_state, user_info, *combi
     # Validate input
-    if any(answer is None for answer in pairwise):
         # Return current state with no changes - let advance_workflow handle the structure
-        ui_updates = advance_workflow(progress_state, data_subset_state, current_pairwise=pairwise)
         return [
             gr.update(visible=False),                                  # page0
             gr.update(visible=True),                                   # page1
@@ -1210,30 +1211,31 @@ def submit_pairwise_scoring(progress_state, data_subset_state, user_info, *combi
             *([gr.update() for _ in range(len_criteria)]),             # ratings_B_page1 (keep current values)
         ]
-    # Validate input - check if all ratings are provided
-    if any(rating is None for rating in ratings_A) or any(rating is None for rating in ratings_B):
-        # Return current state with no changes - let advance_workflow handle the structure
-        ui_updates = advance_workflow(progress_state, data_subset_state, current_scoring=[ratings_A, ratings_B])
-        return [
-            gr.update(visible=False),                                  # page0
-            gr.update(visible=True),                                   # page1
-            "",                                                        # page0_error_box
-            ui_updates.get('page1_prompt'),                            # page1_prompt
-            user_info,                                                  # user_info_state
-            data_subset_state,                                          # data_subset_state
-            ui_updates.get('progress_state'),                           # progress_state
-            progress_state.get('pairwise_results', {}),                 # pairwise_state
-            ui_updates.get('chat_a_answer'),                           # chat_a_answer
-            ui_updates.get('chat_b_answer'),                           # chat_b_answer
-            ui_updates.get('chat_a_reasoning'),                        # chat_a_reasoning
-            ui_updates.get('chat_b_reasoning'),                        # chat_b_reasoning
-            ui_updates.get('pairwise_header'),                         # pairwise_header
-            *([gr.update() for _ in range(len_criteria)]),             # pairwise_inputs (keep current values)
-            *([gr.update() for _ in range(len_criteria)]),             # comparison_reasons_inputs (keep current values)
-            *([gr.update() for _ in range(len_criteria)]),             # ratings_A_page1 (keep current values)
-            *([gr.update() for _ in range(len_criteria)]),             # ratings_B_page1 (keep current values)
-        ]
     # Initialize pairwise_scores as method-keyed dict if it doesn't exist
     if 'pairwise_scores' not in progress_state:

     with open(question_map_path, 'r') as f:
         question_map = json.load(f)
+    # print(f"\033[91m{question_map}\033[0m")
     # 获取评估者目录
     evaluator_directory = question_map.get(evaluator_id, None)
         }
     # Validate input for scoring
+    if current_scoring is not None and (any(answer is None for answer in current_scoring[0]) or any(answer is None for answer in current_scoring[1])):
         ratings_A, ratings_B = current_scoring
         if any(rating is None for rating in ratings_A) or any(rating is None for rating in ratings_B):
             gr.Warning("Error: Please provide ratings for all criteria for both models.",
     # Validate input
+    if any(answer is None for answer in pairwise) or any(rating is None for rating in ratings_A) or any(rating is None for rating in ratings_B):
+        print("Error: Missing pairwise comparison answers.")
         # Return current state with no changes - let advance_workflow handle the structure
+        ui_updates = advance_workflow(progress_state, data_subset_state, current_pairwise=pairwise, current_scoring=[ratings_A, ratings_B])
         return [
             gr.update(visible=False),                                  # page0
             gr.update(visible=True),                                   # page1
             *([gr.update() for _ in range(len_criteria)]),             # ratings_B_page1 (keep current values)
         ]
+    # # Validate input - check if all ratings are provided
+    # if any(rating is None for rating in ratings_A) or any(rating is None for rating in ratings_B):
+    #     print("Error: Missing ratings for one or more criteria.")
+    #     # Return current state with no changes - let advance_workflow handle the structure
+    #     ui_updates = advance_workflow(progress_state, data_subset_state, current_scoring=[ratings_A, ratings_B])
+    #     return [
+    #         gr.update(visible=False),                                  # page0
+    #         gr.update(visible=True),                                   # page1
+    #         "",                                                        # page0_error_box
+    #         ui_updates.get('page1_prompt'),                            # page1_prompt
+    #         user_info,                                                  # user_info_state
+    #         data_subset_state,                                          # data_subset_state
+    #         ui_updates.get('progress_state'),                           # progress_state
+    #         progress_state.get('pairwise_results', {}),                 # pairwise_state
+    #         ui_updates.get('chat_a_answer'),                           # chat_a_answer
+    #         ui_updates.get('chat_b_answer'),                           # chat_b_answer
+    #         ui_updates.get('chat_a_reasoning'),                        # chat_a_reasoning
+    #         ui_updates.get('chat_b_reasoning'),                        # chat_b_reasoning
+    #         ui_updates.get('pairwise_header'),                         # pairwise_header
+    #         *([gr.update() for _ in range(len_criteria)]),             # pairwise_inputs (keep current values)
+    #         *([gr.update() for _ in range(len_criteria)]),             # comparison_reasons_inputs (keep current values)
+    #         *([gr.update() for _ in range(len_criteria)]),             # ratings_A_page1 (keep current values)
+    #         *([gr.update() for _ in range(len_criteria)]),             # ratings_B_page1 (keep current values)
+    #     ]
     # Initialize pairwise_scores as method-keyed dict if it doesn't exist
     if 'pairwise_scores' not in progress_state: