#!/usr/bin/env python3 """ 간단한 TTRLVR + AZR 통합 테스트 가장 기본적인 컴포넌트 테스트: 1. Task Generator 테스트 2. Data Converter 테스트 3. Pipeline 기본 실행 테스트 """ import os import sys import tempfile import shutil from pathlib import Path # 경로 설정 sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2') def test_task_generator(): """Task Generator 기본 테스트""" print("🧪 Testing Task Generator...") try: from absolute_zero_reasoner.testtime.config import TestTimeConfig from absolute_zero_reasoner.testtime.logger import TestTimeLogger from absolute_zero_reasoner.testtime.task_generator import TestTimeTaskGenerator config = TestTimeConfig() config.model_name = "Qwen/Qwen2.5-7B" logger = TestTimeLogger() task_generator = TestTimeTaskGenerator(config, logger) # 테스트 IPO 트리플 test_ipo_triples = [{ 'id': 'test_triple_0', 'input': '[1, 2, 3]', 'actual_output': '[2, 4, 6]', 'program': 'def test_func(lst):\n return [x * 2 for x in lst]', 'full_input_str': 'test_func([1, 2, 3])', 'source_program_id': 'program_0', 'ipo_index': 0 }] # Task 생성 tasks = task_generator.generate_tasks(test_ipo_triples, "TestProblem", 1) # 검증 assert 'induction' in tasks assert 'deduction' in tasks assert 'abduction' in tasks total_tasks = sum(len(task_list) for task_list in tasks.values()) print(f"✅ Task Generator: Generated {total_tasks} tasks") # AZR 메타데이터 확인 for task_type, task_list in tasks.items(): if task_list: task = task_list[0] assert 'uid' in task assert 'ipo_group_id' in task assert 'basic_accuracy' in task print(f"✅ Task Generator: {task_type} has AZR metadata") return True except Exception as e: print(f"❌ Task Generator test failed: {e}") import traceback traceback.print_exc() return False def test_data_converter(): """Data Converter 기본 테스트""" print("\n🧪 Testing Data Converter...") try: from absolute_zero_reasoner.testtime.complete_pipeline import CompleteTestTimePipeline from absolute_zero_reasoner.testtime.config import TestTimeConfig from absolute_zero_reasoner.testtime.logger import TestTimeLogger config = TestTimeConfig() logger = TestTimeLogger() pipeline = CompleteTestTimePipeline(config, logger) # Mock task 데이터 mock_tasks = { 'induction': [{ 'task_id': 'induction_0', 'task_type': 'induction', 'prompt': 'Test prompt', 'uid': 'TestProblem_round_1_induction_0', 'ipo_group_id': 'TestProblem_program_0_ipo_0', 'source_program_id': 'program_0', 'ipo_index': 0, 'ipo_triple': { 'input': '[1, 2, 3]', 'output': '[2, 4, 6]', 'program': 'def test_func(lst):\n return [x * 2 for x in lst]' }, 'ground_truth': 'def test_func(lst):\n return [x * 2 for x in lst]', 'extra_info': {'metric': 'code_f'}, 'basic_accuracy': 1.0, 'original_problem_id': 'TestProblem', 'round': 1 }] } # 임시 디렉토리 with tempfile.TemporaryDirectory() as temp_dir: # 데이터 변환 테스트 saved_files = pipeline._save_azr_training_data( mock_tasks, "TestProblem", 1, temp_dir ) # 파일 생성 확인 assert 'induction' in saved_files assert os.path.exists(saved_files['induction']) # Parquet 파일 읽기 확인 import pandas as pd df = pd.read_parquet(saved_files['induction']) assert len(df) == 1 assert 'prompt' in df.columns assert 'uid' in df.columns assert 'ipo_group_id' in df.columns print("✅ Data Converter: Parquet file created and validated") return True except Exception as e: print(f"❌ Data Converter test failed: {e}") import traceback traceback.print_exc() return False def test_iterative_trainer_basic(): """Iterative Trainer 기본 설정 테스트""" print("\n🧪 Testing Iterative Trainer Setup...") try: from utils.iterative_trainer import IterativeTrainer from absolute_zero_reasoner.testtime.config import TestTimeConfig, BenchmarkConfig from absolute_zero_reasoner.testtime.logger import TestTimeLogger config = TestTimeConfig() logger = TestTimeLogger() trainer = IterativeTrainer(config, logger) # 기본 설정 확인 assert trainer.current_model_path == "Qwen/Qwen2.5-7B" assert trainer.checkpoint_dir == "/data/RLVR/checkpoints/ttrlvr_azr" print("✅ Iterative Trainer: Basic setup successful") return True except Exception as e: print(f"❌ Iterative Trainer test failed: {e}") import traceback traceback.print_exc() return False def main(): """메인 테스트 실행""" print("🚀 TTRLVR + AZR Simple Integration Test") print("=" * 60) tests = [ ("Task Generator", test_task_generator), ("Data Converter", test_data_converter), ("Iterative Trainer", test_iterative_trainer_basic) ] results = [] for test_name, test_func in tests: try: result = test_func() results.append((test_name, result)) except Exception as e: print(f"💥 {test_name} crashed: {e}") results.append((test_name, False)) # 결과 요약 print("\n" + "=" * 60) print("📊 Test Results:") passed = 0 total = len(results) for test_name, result in results: status = "✅ PASS" if result else "❌ FAIL" print(f" {status} {test_name}") if result: passed += 1 print(f"\nOverall: {passed}/{total} tests passed ({passed/total*100:.1f}%)") if passed == total: print("\n🎉 All simple integration tests passed!") return 0 else: print(f"\n⚠️ {total-passed} tests failed") return 1 if __name__ == '__main__': exit_code = main() sys.exit(exit_code)