|
|
|
""" |
|
๋ฒค์น๋งํฌ ๋ฌธ์ ID ๋ชฉ๋ก ์กฐํ ๋๊ตฌ |
|
|
|
์ง์ํ๋ ๋ฒค์น๋งํฌ์ ๋ชจ๋ ๋ฌธ์ ID๋ฅผ ํ์ธํ ์ ์์ต๋๋ค. |
|
HumanEval+, MBPP+, LiveCodeBench ์ง์ |
|
""" |
|
|
|
import os |
|
import sys |
|
import json |
|
import argparse |
|
from pathlib import Path |
|
from datetime import datetime |
|
|
|
|
|
sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2') |
|
|
|
|
|
def load_jsonl(file_path): |
|
"""JSONL ํ์ผ ๋ก๋""" |
|
if not os.path.exists(file_path): |
|
return [] |
|
|
|
with open(file_path, 'r', encoding='utf-8') as f: |
|
return [json.loads(line.strip()) for line in f if line.strip()] |
|
|
|
|
|
def list_humaneval_problems(data_path): |
|
"""HumanEval+ ๋ฌธ์ ๋ชฉ๋ก""" |
|
print("๐ HumanEval+ ๋ฌธ์ ๋ชฉ๋ก") |
|
print("="*60) |
|
|
|
problems = load_jsonl(data_path) |
|
if not problems: |
|
print("โ ๋ฐ์ดํฐ ํ์ผ์ ์ฐพ์ ์ ์์ต๋๋ค.") |
|
return [] |
|
|
|
task_ids = [] |
|
print(f"๐ ์ด {len(problems)}๊ฐ ๋ฌธ์ ๋ฐ๊ฒฌ") |
|
|
|
for i, problem in enumerate(problems): |
|
task_id = problem.get('task_id', f'Unknown_{i}') |
|
task_ids.append(task_id) |
|
|
|
|
|
print("\n๐ ์ ์ฒด ๋ฌธ์ ID ๋ชฉ๋ก:") |
|
for j in range(0, len(task_ids), 10): |
|
batch = task_ids[j:j+10] |
|
print(f" {', '.join(batch)}") |
|
|
|
return task_ids |
|
|
|
|
|
def list_mbpp_problems(data_path): |
|
"""MBPP+ ๋ฌธ์ ๋ชฉ๋ก""" |
|
print("๐ MBPP+ ๋ฌธ์ ๋ชฉ๋ก") |
|
print("="*60) |
|
|
|
problems = load_jsonl(data_path) |
|
if not problems: |
|
print("โ ๋ฐ์ดํฐ ํ์ผ์ ์ฐพ์ ์ ์์ต๋๋ค.") |
|
return [] |
|
|
|
task_ids = [] |
|
print(f"๐ ์ด {len(problems)}๊ฐ ๋ฌธ์ ๋ฐ๊ฒฌ") |
|
|
|
for i, problem in enumerate(problems): |
|
task_id = problem.get('task_id', f'Unknown_{i}') |
|
task_ids.append(task_id) |
|
|
|
|
|
print("\n๐ ์ ์ฒด ๋ฌธ์ ID ๋ชฉ๋ก:") |
|
for j in range(0, len(task_ids), 10): |
|
batch = task_ids[j:j+10] |
|
print(f" {', '.join(batch)}") |
|
|
|
return task_ids |
|
|
|
|
|
def list_lcb_problems(data_path): |
|
"""LiveCodeBench ๋ฌธ์ ๋ชฉ๋ก""" |
|
print("๐ LiveCodeBench ๋ฌธ์ ๋ชฉ๋ก") |
|
print("="*60) |
|
|
|
|
|
lcb_files = list(Path(data_path).glob("**/*.jsonl")) if os.path.exists(data_path) else [] |
|
|
|
if not lcb_files: |
|
print("โ LiveCodeBench ๋ฐ์ดํฐ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.") |
|
return [] |
|
|
|
all_task_ids = [] |
|
print(f"๐ {len(lcb_files)}๊ฐ ํ์ผ ๋ฐ๊ฒฌ") |
|
|
|
for file_path in lcb_files[:5]: |
|
print(f"\n๐ ํ์ผ: {file_path.name}") |
|
problems = load_jsonl(file_path) |
|
|
|
for i, problem in enumerate(problems[:10]): |
|
task_id = problem.get('task_id', problem.get('id', f'LCB_{i}')) |
|
all_task_ids.append(task_id) |
|
prompt_preview = problem.get('prompt', problem.get('description', ''))[:80].replace('\n', ' ') |
|
print(f" {len(all_task_ids):3d}. {task_id} - {prompt_preview}...") |
|
|
|
if len(problems) > 10: |
|
print(f" ... ({len(problems)-10}๊ฐ ๋ฌธ์ ๋ ์์)") |
|
|
|
if len(lcb_files) > 5: |
|
print(f"\n... ({len(lcb_files)-5}๊ฐ ํ์ผ ๋ ์์)") |
|
|
|
return all_task_ids |
|
|
|
|
|
def save_problem_list(task_ids, benchmark, output_dir): |
|
"""๋ฌธ์ ๋ชฉ๋ก์ ๋ฒค์น๋งํฌ๋ณ ๋๋ ํ ๋ฆฌ์ ์ ์ฅ""" |
|
|
|
benchmark_dir = os.path.join(output_dir, benchmark) |
|
os.makedirs(benchmark_dir, exist_ok=True) |
|
|
|
|
|
all_problems_file = os.path.join(benchmark_dir, f"{benchmark}_all_problems.json") |
|
output_data = { |
|
'benchmark': benchmark, |
|
'total_problems': len(task_ids), |
|
'task_ids': task_ids, |
|
'generated_at': datetime.now().isoformat(), |
|
'data_source': f'{benchmark}_plus_dataset' |
|
} |
|
|
|
with open(all_problems_file, 'w', encoding='utf-8') as f: |
|
json.dump(output_data, f, indent=2, ensure_ascii=False) |
|
|
|
print(f"\n๐พ ๋ฌธ์ ๋ชฉ๋ก์ด ์ ์ฅ๋์์ต๋๋ค:") |
|
print(f" ์ ์ฒด ๋ชฉ๋ก: {all_problems_file}") |
|
|
|
|
|
def main(): |
|
parser = argparse.ArgumentParser(description='๋ฒค์น๋งํฌ ๋ฌธ์ ID ๋ชฉ๋ก ์กฐํ') |
|
parser.add_argument('--benchmark', type=str, default='all', |
|
choices=['all', 'humaneval', 'mbpp', 'lcb'], |
|
help='์กฐํํ ๋ฒค์น๋งํฌ (all=๋ชจ๋ ๋ฒค์น๋งํฌ)') |
|
parser.add_argument('--save', action='store_true', |
|
help='๊ฒฐ๊ณผ๋ฅผ JSON ํ์ผ๋ก ์ ์ฅ') |
|
parser.add_argument('--output_dir', type=str, |
|
default='/home/ubuntu/RLVR/TestTime-RLVR-v2/tmp', |
|
help='์ถ๋ ฅ ๋๋ ํ ๋ฆฌ') |
|
|
|
args = parser.parse_args() |
|
|
|
|
|
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
|
humaneval_path = f'{base_dir}/evaluation/code_eval/data/HumanEvalPlus.jsonl' |
|
mbpp_path = f'{base_dir}/evaluation/code_eval/data/MbppPlus.jsonl' |
|
lcb_path = f'{base_dir}/evaluation/code_eval/coding/LiveCodeBench' |
|
|
|
os.makedirs(args.output_dir, exist_ok=True) |
|
|
|
print("๐ TestTime RLVR ๋ฒค์น๋งํฌ ๋ฌธ์ ๋ชฉ๋ก ์กฐํ ๋๊ตฌ") |
|
print("="*80) |
|
|
|
all_results = {} |
|
|
|
if args.benchmark in ['all', 'humaneval']: |
|
print("\n") |
|
task_ids = list_humaneval_problems(humaneval_path) |
|
all_results['humaneval'] = task_ids |
|
|
|
if args.save and task_ids: |
|
save_problem_list(task_ids, 'humaneval', args.output_dir) |
|
|
|
if args.benchmark in ['all', 'mbpp']: |
|
print("\n") |
|
task_ids = list_mbpp_problems(mbpp_path) |
|
all_results['mbpp'] = task_ids |
|
|
|
if args.save and task_ids: |
|
save_problem_list(task_ids, 'mbpp', args.output_dir) |
|
|
|
if args.benchmark in ['all', 'lcb']: |
|
print("\n") |
|
task_ids = list_lcb_problems(lcb_path) |
|
all_results['lcb'] = task_ids |
|
|
|
if args.save and task_ids: |
|
save_problem_list(task_ids, 'livecodebrench', args.output_dir) |
|
|
|
|
|
print("\n" + "="*80) |
|
print("๐ ๋ฒค์น๋งํฌ ์์ฝ") |
|
print("="*80) |
|
|
|
total_problems = 0 |
|
for benchmark, task_ids in all_results.items(): |
|
if task_ids: |
|
print(f"๐ {benchmark.upper()}: {len(task_ids)}๊ฐ ๋ฌธ์ ") |
|
total_problems += len(task_ids) |
|
|
|
|
|
if task_ids: |
|
print(f" ์ํ ID: {', '.join(task_ids[:5])}") |
|
if len(task_ids) > 5: |
|
print(f" ... (์ด {len(task_ids)}๊ฐ)") |
|
|
|
print(f"\n๐ฏ ์ ์ฒด ์ฌ์ฉ ๊ฐ๋ฅํ ๋ฌธ์ : {total_problems}๊ฐ") |
|
|
|
|
|
print("\n" + "="*80) |
|
print("๐ก ์ฌ์ฉ๋ฒ") |
|
print("="*80) |
|
print("ํ
์คํธ ์คํ ์์:") |
|
|
|
if 'humaneval' in all_results and all_results['humaneval']: |
|
sample_id = all_results['humaneval'][0] |
|
print(f" python test_complete_pipeline.py --benchmark humaneval --problem_id \"{sample_id}\"") |
|
|
|
if 'mbpp' in all_results and all_results['mbpp']: |
|
sample_id = all_results['mbpp'][0] |
|
print(f" python test_complete_pipeline.py --benchmark mbpp --problem_id \"{sample_id}\"") |
|
|
|
print("\nํน์ ๋ฌธ์ ๋ง ํ์ธ:") |
|
print(" python list_benchmark_problems.py --benchmark mbpp") |
|
print(" python list_benchmark_problems.py --benchmark humaneval --save") |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |