File size: 5,612 Bytes
24c2665
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/env python3
"""
VeRL ν•™μŠ΅ 단독 ν…ŒμŠ€νŠΈ 슀크립트
κΈ°μ‘΄ λ°μ΄ν„°λ‘œ Step 5만 μ‹€ν–‰ν•˜μ—¬ ν•™μŠ΅μ΄ μ œλŒ€λ‘œ λ˜λŠ”μ§€ 확인
"""
import os
import sys
sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2')
sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2/test')
sys.path.append('/home/ubuntu/RLVR/verl')

from utils.iterative_trainer import IterativeTrainer
from absolute_zero_reasoner.testtime.config import TestTimeConfig
from absolute_zero_reasoner.testtime.logger import TestTimeLogger
import torch

def test_verl_training():
    """κΈ°μ‘΄ λ°μ΄ν„°λ‘œ VeRL ν•™μŠ΅λ§Œ ν…ŒμŠ€νŠΈ"""
    
    # κΈ°λ³Έ μ„€μ •
    config = TestTimeConfig(
        model_name="Qwen/Qwen2.5-7B",
        batch_size=8,
        temperature=0.7,
        top_k=50,
        top_p=0.9,
        max_new_tokens=2048,
        save_model=True
    )
    
    # 둜거 μ„€μ •
    logger = TestTimeLogger(
        log_dir="/home/ubuntu/RLVR/TestTime-RLVR-v2/test_verl_logs",
        log_level="INFO"
    )
    
    # Trainer μ΄ˆκΈ°ν™”
    trainer = IterativeTrainer(
        config=config,
        logger=logger,
        verl_config_path="/home/ubuntu/RLVR/TestTime-RLVR-v2/test/configs/ttrlvr_azr_ppo_4gpu.yaml",
        save_every_round=True
    )
    
    # ν•™μŠ΅ 데이터 경둜
    training_data_path = "/home/ubuntu/RLVR/TestTime-RLVR-v2/tmp/batch_results/ttrlvr_azr_20250805_142357/mbpp/Mbpp_2/round_1/azr_training_data"
    
    logger.log_info("="*80)
    logger.log_info("πŸ§ͺ VeRL Training Test - Step 5 Only")
    logger.log_info("="*80)
    logger.log_info(f"πŸ“ Training data: {training_data_path}")
    
    # 데이터 파일 확인
    if os.path.exists(training_data_path):
        files = os.listdir(training_data_path)
        logger.log_info(f"πŸ“„ Found {len(files)} files:")
        for f in sorted(files):
            if f.endswith('.parquet'):
                file_path = os.path.join(training_data_path, f)
                file_size = os.path.getsize(file_path) / 1024 / 1024  # MB
                logger.log_info(f"   - {f}: {file_size:.2f} MB")
    else:
        logger.log_error(f"❌ Training data directory not found: {training_data_path}")
        return
    
    # GPU λ©”λͺ¨λ¦¬ μƒνƒœ 확인
    if torch.cuda.is_available():
        logger.log_info(f"πŸ–₯️  GPU available: {torch.cuda.get_device_name(0)}")
        logger.log_info(f"πŸ“Š GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
        
        # ν˜„μž¬ GPU μ‚¬μš©λŸ‰
        allocated = torch.cuda.memory_allocated() / 1024**3
        reserved = torch.cuda.memory_reserved() / 1024**3
        logger.log_info(f"πŸ“Š Current usage - Allocated: {allocated:.2f} GB, Reserved: {reserved:.2f} GB")
    
    # VeRL ν•™μŠ΅λ§Œ μ‹€ν–‰ (Step 5)
    try:
        logger.log_info("\n" + "="*80)
        logger.log_info("πŸš€ Starting VeRL training test...")
        logger.log_info("="*80 + "\n")
        
        # run_verl_training_only λ©”μ„œλ“œ 호좜
        result = trainer.run_verl_training_only(
            training_data_path=training_data_path,
            round_num=1,
            experiment_name="verl_test_step5_only"
        )
        
        if result['success']:
            logger.log_info("\n" + "="*80)
            logger.log_info("βœ… VeRL training test completed successfully!")
            logger.log_info("="*80)
            
            # κ²°κ³Ό μš”μ•½
            if 'duration' in result:
                logger.log_info(f"⏱️  Training duration: {result['duration']:.2f} seconds")
            if 'model_path' in result:
                logger.log_info(f"πŸ’Ύ Model saved to: {result['model_path']}")
                
            # llm_responses 확인
            llm_responses_dir = os.path.join(training_data_path, "llm_responses")
            if os.path.exists(llm_responses_dir):
                response_files = [f for f in os.listdir(llm_responses_dir) if f.endswith('.jsonl')]
                logger.log_info(f"πŸ“ Generated {len(response_files)} response files")
                for f in sorted(response_files)[:5]:  # 처음 5개만 ν‘œμ‹œ
                    logger.log_info(f"   - {f}")
            else:
                logger.log_warning("⚠️  No llm_responses directory found")
                
        else:
            logger.log_error("\n" + "="*80)
            logger.log_error(f"❌ VeRL training failed: {result.get('error', 'Unknown error')}")
            logger.log_error("="*80)
            
    except Exception as e:
        logger.log_error(f"\nπŸ’₯ Test failed with exception: {e}")
        import traceback
        traceback.print_exc()
        
    finally:
        # 정리
        logger.log_info("\n🧹 Cleaning up...")
        if hasattr(trainer, 'cleanup'):
            trainer.cleanup()
        
        # μ΅œμ’… GPU λ©”λͺ¨λ¦¬ μƒνƒœ
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            allocated = torch.cuda.memory_allocated() / 1024**3
            reserved = torch.cuda.memory_reserved() / 1024**3
            logger.log_info(f"πŸ“Š Final GPU usage - Allocated: {allocated:.2f} GB, Reserved: {reserved:.2f} GB")
        
        logger.log_info("\nβœ… Test script completed")


if __name__ == "__main__":
    # ν™˜κ²½ λ³€μˆ˜ μ„€μ •
    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"  # 4개 GPU μ‚¬μš©
    os.environ["TOKENIZERS_PARALLELISM"] = "false"
    os.environ["NCCL_DEBUG"] = "WARN"
    
    print("\n" + "="*80)
    print("πŸ§ͺ VeRL Training Test - Testing Step 5 Only")
    print("πŸ“ Using existing data from previous run")
    print("="*80 + "\n")
    
    test_verl_training()