|
|
|
|
|
|
|
import json |
|
import os |
|
import csv |
|
from pathlib import Path |
|
from collections import defaultdict |
|
|
|
|
|
def parse_race_result(race_result_file): |
|
"""解析race_result.txt文件获取各维度分数""" |
|
scores = {} |
|
|
|
with open(race_result_file, 'r', encoding='utf-8') as f: |
|
for line in f: |
|
line = line.strip() |
|
if ':' in line: |
|
key, value = line.split(':', 1) |
|
key = key.strip() |
|
value = float(value.strip()) |
|
|
|
if key == 'Comprehensiveness': |
|
scores['comprehensiveness'] = value * 100 |
|
elif key == 'Insight': |
|
scores['insight'] = value * 100 |
|
elif key == 'Instruction Following': |
|
scores['instruction_following'] = value * 100 |
|
elif key == 'Readability': |
|
scores['readability'] = value * 100 |
|
elif key == 'Overall Score': |
|
scores['overall_score'] = value * 100 |
|
|
|
return scores |
|
|
|
|
|
def parse_fact_result(fact_result_file): |
|
"""解析fact_result.txt文件获取引用相关指标""" |
|
citation_scores = {} |
|
|
|
if not fact_result_file.exists(): |
|
return citation_scores |
|
|
|
with open(fact_result_file, 'r', encoding='utf-8') as f: |
|
for line in f: |
|
line = line.strip() |
|
if ':' in line: |
|
key, value = line.split(':', 1) |
|
key = key.strip() |
|
value = float(value.strip()) |
|
|
|
if key == 'valid_rate': |
|
citation_scores['citation_accuracy'] = value * 100 |
|
elif key == 'total_valid_citations': |
|
citation_scores['effective_citations'] = value |
|
elif key == 'supported_per_task': |
|
citation_scores['effective_citations'] = value |
|
|
|
return citation_scores |
|
|
|
|
|
def process_model_data(model_dir): |
|
"""处理单个模型文件夹的数据""" |
|
model_name = model_dir.name |
|
race_result_file = model_dir / "race_result.txt" |
|
|
|
if not race_result_file.exists(): |
|
print(f"警告: 模型 {model_name} 的文件夹中未找到 race_result.txt") |
|
return None |
|
|
|
print(f"正在处理模型: {model_name}") |
|
|
|
try: |
|
scores = parse_race_result(race_result_file) |
|
|
|
if not scores: |
|
print(f" - 警告: 未能解析到有效分数") |
|
return None |
|
|
|
|
|
project_root = Path(__file__).parent.parent |
|
fact_results_dir = project_root / "data" / "fact_results" |
|
fact_result_file = fact_results_dir / model_name / "fact_result.txt" |
|
|
|
citation_scores = parse_fact_result(fact_result_file) |
|
|
|
if citation_scores: |
|
print(f" - 总分: {scores['overall_score']:.2f}, 引用准确率: {citation_scores.get('citation_accuracy', 'N/A'):.2f}%, 有效引用数: {citation_scores.get('effective_citations', 'N/A')}") |
|
else: |
|
print(f" - 总分: {scores['overall_score']:.2f}, 引用数据: 未找到") |
|
|
|
result = { |
|
'model': model_name, |
|
'overall_score': scores['overall_score'], |
|
'comprehensiveness': scores['comprehensiveness'], |
|
'insight': scores['insight'], |
|
'instruction_following': scores['instruction_following'], |
|
'readability': scores['readability'], |
|
'citation_accuracy': citation_scores.get('citation_accuracy', None), |
|
'effective_citations': citation_scores.get('effective_citations', None) |
|
} |
|
|
|
return result |
|
|
|
except Exception as e: |
|
print(f" - 错误: 处理文件时出错: {e}") |
|
return None |
|
|
|
|
|
def rank_leaderboard(): |
|
"""计算排行榜并保存到CSV""" |
|
project_root = Path(__file__).parent.parent |
|
input_dir = project_root / "data" / "raw_results" |
|
output_file = project_root / "data" / "leaderboard.csv" |
|
|
|
model_dirs = [d for d in input_dir.iterdir() if d.is_dir()] |
|
print(f"找到 {len(model_dirs)} 个模型文件夹") |
|
|
|
if not model_dirs: |
|
print("未找到任何模型文件夹") |
|
return |
|
|
|
model_results = [] |
|
for model_dir in model_dirs: |
|
try: |
|
result = process_model_data(model_dir) |
|
if result: |
|
model_results.append(result) |
|
except Exception as e: |
|
print(f"处理文件夹 {model_dir.name} 时出错: {e}") |
|
continue |
|
|
|
|
|
model_results.sort(key=lambda x: x['overall_score'], reverse=True) |
|
|
|
|
|
with open(output_file, 'w', newline='', encoding='utf-8') as csvfile: |
|
fieldnames = ['model', 'overall_score', 'comprehensiveness', 'insight', 'instruction_following', 'readability', 'citation_accuracy', 'effective_citations'] |
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames) |
|
|
|
writer.writeheader() |
|
|
|
for result in model_results: |
|
|
|
row = { |
|
'model': result['model'], |
|
'overall_score': f"{result['overall_score']:.2f}", |
|
'comprehensiveness': f"{result['comprehensiveness']:.2f}", |
|
'insight': f"{result['insight']:.2f}", |
|
'instruction_following': f"{result['instruction_following']:.2f}", |
|
'readability': f"{result['readability']:.2f}", |
|
'citation_accuracy': f"{result['citation_accuracy']:.2f}" if result['citation_accuracy'] is not None else "-", |
|
'effective_citations': f"{result['effective_citations']:.2f}" if result['effective_citations'] is not None else "-" |
|
} |
|
writer.writerow(row) |
|
|
|
print(f"\n排行榜已保存到: {output_file}") |
|
print(f"共处理了 {len(model_results)} 个模型") |
|
|
|
|
|
if __name__ == "__main__": |
|
rank_leaderboard() |
|
print("排行榜计算完成!") |
|
|