Spaces:
Sleeping
Sleeping
update
Browse files
app.py
CHANGED
|
@@ -13,8 +13,8 @@ from PIL import Image
|
|
| 13 |
REPO_ID = "agenticx/TxAgentEvalData"
|
| 14 |
EVALUATOR_MAP_DICT = "evaluator_map_dict.json"
|
| 15 |
TXAGENT_RESULTS_SHEET_BASE_NAME = "TxAgent_Human_Eval_Results_CROWDSOURCED"
|
| 16 |
-
our_methods = ['
|
| 17 |
-
baseline_methods = ['
|
| 18 |
# Load tool lists from 'tool_lists' subdirectory
|
| 19 |
tools_dir = os.path.join(os.getcwd(), 'tool_lists')
|
| 20 |
|
|
@@ -718,7 +718,8 @@ def get_next_eval_question(user_info, our_methods, return_user_info=True, includ
|
|
| 718 |
validation_error = validate_required_fields(
|
| 719 |
name, email, evaluator_id, specialty_dd, years_exp_radio)
|
| 720 |
if validation_error:
|
| 721 |
-
return None, gr.update(visible=True), gr.update(visible=False), "Wrong info.", None, 0, None
|
|
|
|
| 722 |
|
| 723 |
# 2. 获取评估者问题映射
|
| 724 |
question_map_path = hf_hub_download(
|
|
@@ -732,6 +733,8 @@ def get_next_eval_question(user_info, our_methods, return_user_info=True, includ
|
|
| 732 |
# 加载问题映射
|
| 733 |
with open(question_map_path, 'r') as f:
|
| 734 |
question_map = json.load(f)
|
|
|
|
|
|
|
| 735 |
|
| 736 |
# 获取评估者目录
|
| 737 |
evaluator_directory = question_map.get(evaluator_id, None)
|
|
@@ -752,7 +755,7 @@ def get_next_eval_question(user_info, our_methods, return_user_info=True, includ
|
|
| 752 |
evaluator_id, all_files, evaluator_directory, our_methods)
|
| 753 |
|
| 754 |
if len(full_question_ids_list) == 0:
|
| 755 |
-
return None, None, None, None, None, 0, None
|
| 756 |
|
| 757 |
# 确定当前问题 ID 并收集模型数据
|
| 758 |
full_question_ids_list = sorted(
|
|
|
|
| 13 |
REPO_ID = "agenticx/TxAgentEvalData"
|
| 14 |
EVALUATOR_MAP_DICT = "evaluator_map_dict.json"
|
| 15 |
TXAGENT_RESULTS_SHEET_BASE_NAME = "TxAgent_Human_Eval_Results_CROWDSOURCED"
|
| 16 |
+
our_methods = ['txagent']
|
| 17 |
+
baseline_methods = ['Qwen3-8B']
|
| 18 |
# Load tool lists from 'tool_lists' subdirectory
|
| 19 |
tools_dir = os.path.join(os.getcwd(), 'tool_lists')
|
| 20 |
|
|
|
|
| 718 |
validation_error = validate_required_fields(
|
| 719 |
name, email, evaluator_id, specialty_dd, years_exp_radio)
|
| 720 |
if validation_error:
|
| 721 |
+
# return None, gr.update(visible=True), gr.update(visible=False), "Wrong info.", None, 0, None
|
| 722 |
+
return None, gr.update(visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), "Wrong info.", None, 0, None
|
| 723 |
|
| 724 |
# 2. 获取评估者问题映射
|
| 725 |
question_map_path = hf_hub_download(
|
|
|
|
| 733 |
# 加载问题映射
|
| 734 |
with open(question_map_path, 'r') as f:
|
| 735 |
question_map = json.load(f)
|
| 736 |
+
|
| 737 |
+
print(f"\033[91m{question_map}\033[0m")
|
| 738 |
|
| 739 |
# 获取评估者目录
|
| 740 |
evaluator_directory = question_map.get(evaluator_id, None)
|
|
|
|
| 755 |
evaluator_id, all_files, evaluator_directory, our_methods)
|
| 756 |
|
| 757 |
if len(full_question_ids_list) == 0:
|
| 758 |
+
return None, None, None, None, None, 0, None, None, None
|
| 759 |
|
| 760 |
# 确定当前问题 ID 并收集模型数据
|
| 761 |
full_question_ids_list = sorted(
|