neural-mesh-v2 / test /simple_integration_test.py
hjkim00's picture
Restore all essential files - code, configs, and MBPP/HumanEval data
24c2665 verified
raw
history blame
6.96 kB
#!/usr/bin/env python3
"""
๊ฐ„๋‹จํ•œ TTRLVR + AZR ํ†ตํ•ฉ ํ…Œ์ŠคํŠธ
๊ฐ€์žฅ ๊ธฐ๋ณธ์ ์ธ ์ปดํฌ๋„ŒํŠธ ํ…Œ์ŠคํŠธ:
1. Task Generator ํ…Œ์ŠคํŠธ
2. Data Converter ํ…Œ์ŠคํŠธ
3. Pipeline ๊ธฐ๋ณธ ์‹คํ–‰ ํ…Œ์ŠคํŠธ
"""
import os
import sys
import tempfile
import shutil
from pathlib import Path
# ๊ฒฝ๋กœ ์„ค์ •
sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2')
def test_task_generator():
"""Task Generator ๊ธฐ๋ณธ ํ…Œ์ŠคํŠธ"""
print("๐Ÿงช Testing Task Generator...")
try:
from absolute_zero_reasoner.testtime.config import TestTimeConfig
from absolute_zero_reasoner.testtime.logger import TestTimeLogger
from absolute_zero_reasoner.testtime.task_generator import TestTimeTaskGenerator
config = TestTimeConfig()
config.model_name = "Qwen/Qwen2.5-7B"
logger = TestTimeLogger()
task_generator = TestTimeTaskGenerator(config, logger)
# ํ…Œ์ŠคํŠธ IPO ํŠธ๋ฆฌํ”Œ
test_ipo_triples = [{
'id': 'test_triple_0',
'input': '[1, 2, 3]',
'actual_output': '[2, 4, 6]',
'program': 'def test_func(lst):\n return [x * 2 for x in lst]',
'full_input_str': 'test_func([1, 2, 3])',
'source_program_id': 'program_0',
'ipo_index': 0
}]
# Task ์ƒ์„ฑ
tasks = task_generator.generate_tasks(test_ipo_triples, "TestProblem", 1)
# ๊ฒ€์ฆ
assert 'induction' in tasks
assert 'deduction' in tasks
assert 'abduction' in tasks
total_tasks = sum(len(task_list) for task_list in tasks.values())
print(f"โœ… Task Generator: Generated {total_tasks} tasks")
# AZR ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ํ™•์ธ
for task_type, task_list in tasks.items():
if task_list:
task = task_list[0]
assert 'uid' in task
assert 'ipo_group_id' in task
assert 'basic_accuracy' in task
print(f"โœ… Task Generator: {task_type} has AZR metadata")
return True
except Exception as e:
print(f"โŒ Task Generator test failed: {e}")
import traceback
traceback.print_exc()
return False
def test_data_converter():
"""Data Converter ๊ธฐ๋ณธ ํ…Œ์ŠคํŠธ"""
print("\n๐Ÿงช Testing Data Converter...")
try:
from absolute_zero_reasoner.testtime.complete_pipeline import CompleteTestTimePipeline
from absolute_zero_reasoner.testtime.config import TestTimeConfig
from absolute_zero_reasoner.testtime.logger import TestTimeLogger
config = TestTimeConfig()
logger = TestTimeLogger()
pipeline = CompleteTestTimePipeline(config, logger)
# Mock task ๋ฐ์ดํ„ฐ
mock_tasks = {
'induction': [{
'task_id': 'induction_0',
'task_type': 'induction',
'prompt': 'Test prompt',
'uid': 'TestProblem_round_1_induction_0',
'ipo_group_id': 'TestProblem_program_0_ipo_0',
'source_program_id': 'program_0',
'ipo_index': 0,
'ipo_triple': {
'input': '[1, 2, 3]',
'output': '[2, 4, 6]',
'program': 'def test_func(lst):\n return [x * 2 for x in lst]'
},
'ground_truth': 'def test_func(lst):\n return [x * 2 for x in lst]',
'extra_info': {'metric': 'code_f'},
'basic_accuracy': 1.0,
'original_problem_id': 'TestProblem',
'round': 1
}]
}
# ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ
with tempfile.TemporaryDirectory() as temp_dir:
# ๋ฐ์ดํ„ฐ ๋ณ€ํ™˜ ํ…Œ์ŠคํŠธ
saved_files = pipeline._save_azr_training_data(
mock_tasks, "TestProblem", 1, temp_dir
)
# ํŒŒ์ผ ์ƒ์„ฑ ํ™•์ธ
assert 'induction' in saved_files
assert os.path.exists(saved_files['induction'])
# Parquet ํŒŒ์ผ ์ฝ๊ธฐ ํ™•์ธ
import pandas as pd
df = pd.read_parquet(saved_files['induction'])
assert len(df) == 1
assert 'prompt' in df.columns
assert 'uid' in df.columns
assert 'ipo_group_id' in df.columns
print("โœ… Data Converter: Parquet file created and validated")
return True
except Exception as e:
print(f"โŒ Data Converter test failed: {e}")
import traceback
traceback.print_exc()
return False
def test_iterative_trainer_basic():
"""Iterative Trainer ๊ธฐ๋ณธ ์„ค์ • ํ…Œ์ŠคํŠธ"""
print("\n๐Ÿงช Testing Iterative Trainer Setup...")
try:
from utils.iterative_trainer import IterativeTrainer
from absolute_zero_reasoner.testtime.config import TestTimeConfig, BenchmarkConfig
from absolute_zero_reasoner.testtime.logger import TestTimeLogger
config = TestTimeConfig()
logger = TestTimeLogger()
trainer = IterativeTrainer(config, logger)
# ๊ธฐ๋ณธ ์„ค์ • ํ™•์ธ
assert trainer.current_model_path == "Qwen/Qwen2.5-7B"
assert trainer.checkpoint_dir == "/data/RLVR/checkpoints/ttrlvr_azr"
print("โœ… Iterative Trainer: Basic setup successful")
return True
except Exception as e:
print(f"โŒ Iterative Trainer test failed: {e}")
import traceback
traceback.print_exc()
return False
def main():
"""๋ฉ”์ธ ํ…Œ์ŠคํŠธ ์‹คํ–‰"""
print("๐Ÿš€ TTRLVR + AZR Simple Integration Test")
print("=" * 60)
tests = [
("Task Generator", test_task_generator),
("Data Converter", test_data_converter),
("Iterative Trainer", test_iterative_trainer_basic)
]
results = []
for test_name, test_func in tests:
try:
result = test_func()
results.append((test_name, result))
except Exception as e:
print(f"๐Ÿ’ฅ {test_name} crashed: {e}")
results.append((test_name, False))
# ๊ฒฐ๊ณผ ์š”์•ฝ
print("\n" + "=" * 60)
print("๐Ÿ“Š Test Results:")
passed = 0
total = len(results)
for test_name, result in results:
status = "โœ… PASS" if result else "โŒ FAIL"
print(f" {status} {test_name}")
if result:
passed += 1
print(f"\nOverall: {passed}/{total} tests passed ({passed/total*100:.1f}%)")
if passed == total:
print("\n๐ŸŽ‰ All simple integration tests passed!")
return 0
else:
print(f"\nโš ๏ธ {total-passed} tests failed")
return 1
if __name__ == '__main__':
exit_code = main()
sys.exit(exit_code)