File size: 5,612 Bytes
24c2665 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
#!/usr/bin/env python3
"""
VeRL νμ΅ λ¨λ
ν
μ€νΈ μ€ν¬λ¦½νΈ
κΈ°μ‘΄ λ°μ΄ν°λ‘ Step 5λ§ μ€ννμ¬ νμ΅μ΄ μ λλ‘ λλμ§ νμΈ
"""
import os
import sys
sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2')
sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2/test')
sys.path.append('/home/ubuntu/RLVR/verl')
from utils.iterative_trainer import IterativeTrainer
from absolute_zero_reasoner.testtime.config import TestTimeConfig
from absolute_zero_reasoner.testtime.logger import TestTimeLogger
import torch
def test_verl_training():
"""κΈ°μ‘΄ λ°μ΄ν°λ‘ VeRL νμ΅λ§ ν
μ€νΈ"""
# κΈ°λ³Έ μ€μ
config = TestTimeConfig(
model_name="Qwen/Qwen2.5-7B",
batch_size=8,
temperature=0.7,
top_k=50,
top_p=0.9,
max_new_tokens=2048,
save_model=True
)
# λ‘κ±° μ€μ
logger = TestTimeLogger(
log_dir="/home/ubuntu/RLVR/TestTime-RLVR-v2/test_verl_logs",
log_level="INFO"
)
# Trainer μ΄κΈ°ν
trainer = IterativeTrainer(
config=config,
logger=logger,
verl_config_path="/home/ubuntu/RLVR/TestTime-RLVR-v2/test/configs/ttrlvr_azr_ppo_4gpu.yaml",
save_every_round=True
)
# νμ΅ λ°μ΄ν° κ²½λ‘
training_data_path = "/home/ubuntu/RLVR/TestTime-RLVR-v2/tmp/batch_results/ttrlvr_azr_20250805_142357/mbpp/Mbpp_2/round_1/azr_training_data"
logger.log_info("="*80)
logger.log_info("π§ͺ VeRL Training Test - Step 5 Only")
logger.log_info("="*80)
logger.log_info(f"π Training data: {training_data_path}")
# λ°μ΄ν° νμΌ νμΈ
if os.path.exists(training_data_path):
files = os.listdir(training_data_path)
logger.log_info(f"π Found {len(files)} files:")
for f in sorted(files):
if f.endswith('.parquet'):
file_path = os.path.join(training_data_path, f)
file_size = os.path.getsize(file_path) / 1024 / 1024 # MB
logger.log_info(f" - {f}: {file_size:.2f} MB")
else:
logger.log_error(f"β Training data directory not found: {training_data_path}")
return
# GPU λ©λͺ¨λ¦¬ μν νμΈ
if torch.cuda.is_available():
logger.log_info(f"π₯οΈ GPU available: {torch.cuda.get_device_name(0)}")
logger.log_info(f"π GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
# νμ¬ GPU μ¬μ©λ
allocated = torch.cuda.memory_allocated() / 1024**3
reserved = torch.cuda.memory_reserved() / 1024**3
logger.log_info(f"π Current usage - Allocated: {allocated:.2f} GB, Reserved: {reserved:.2f} GB")
# VeRL νμ΅λ§ μ€ν (Step 5)
try:
logger.log_info("\n" + "="*80)
logger.log_info("π Starting VeRL training test...")
logger.log_info("="*80 + "\n")
# run_verl_training_only λ©μλ νΈμΆ
result = trainer.run_verl_training_only(
training_data_path=training_data_path,
round_num=1,
experiment_name="verl_test_step5_only"
)
if result['success']:
logger.log_info("\n" + "="*80)
logger.log_info("β
VeRL training test completed successfully!")
logger.log_info("="*80)
# κ²°κ³Ό μμ½
if 'duration' in result:
logger.log_info(f"β±οΈ Training duration: {result['duration']:.2f} seconds")
if 'model_path' in result:
logger.log_info(f"πΎ Model saved to: {result['model_path']}")
# llm_responses νμΈ
llm_responses_dir = os.path.join(training_data_path, "llm_responses")
if os.path.exists(llm_responses_dir):
response_files = [f for f in os.listdir(llm_responses_dir) if f.endswith('.jsonl')]
logger.log_info(f"π Generated {len(response_files)} response files")
for f in sorted(response_files)[:5]: # μ²μ 5κ°λ§ νμ
logger.log_info(f" - {f}")
else:
logger.log_warning("β οΈ No llm_responses directory found")
else:
logger.log_error("\n" + "="*80)
logger.log_error(f"β VeRL training failed: {result.get('error', 'Unknown error')}")
logger.log_error("="*80)
except Exception as e:
logger.log_error(f"\nπ₯ Test failed with exception: {e}")
import traceback
traceback.print_exc()
finally:
# μ 리
logger.log_info("\nπ§Ή Cleaning up...")
if hasattr(trainer, 'cleanup'):
trainer.cleanup()
# μ΅μ’
GPU λ©λͺ¨λ¦¬ μν
if torch.cuda.is_available():
torch.cuda.empty_cache()
allocated = torch.cuda.memory_allocated() / 1024**3
reserved = torch.cuda.memory_reserved() / 1024**3
logger.log_info(f"π Final GPU usage - Allocated: {allocated:.2f} GB, Reserved: {reserved:.2f} GB")
logger.log_info("\nβ
Test script completed")
if __name__ == "__main__":
# νκ²½ λ³μ μ€μ
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3" # 4κ° GPU μ¬μ©
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["NCCL_DEBUG"] = "WARN"
print("\n" + "="*80)
print("π§ͺ VeRL Training Test - Testing Step 5 Only")
print("π Using existing data from previous run")
print("="*80 + "\n")
test_verl_training() |