neural-mesh-v2 / test_verl_training.py
hjkim00's picture
Restore all essential files - code, configs, and MBPP/HumanEval data
24c2665 verified
raw
history blame
5.61 kB
#!/usr/bin/env python3
"""
VeRL ν•™μŠ΅ 단독 ν…ŒμŠ€νŠΈ 슀크립트
κΈ°μ‘΄ λ°μ΄ν„°λ‘œ Step 5만 μ‹€ν–‰ν•˜μ—¬ ν•™μŠ΅μ΄ μ œλŒ€λ‘œ λ˜λŠ”μ§€ 확인
"""
import os
import sys
sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2')
sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2/test')
sys.path.append('/home/ubuntu/RLVR/verl')
from utils.iterative_trainer import IterativeTrainer
from absolute_zero_reasoner.testtime.config import TestTimeConfig
from absolute_zero_reasoner.testtime.logger import TestTimeLogger
import torch
def test_verl_training():
"""κΈ°μ‘΄ λ°μ΄ν„°λ‘œ VeRL ν•™μŠ΅λ§Œ ν…ŒμŠ€νŠΈ"""
# κΈ°λ³Έ μ„€μ •
config = TestTimeConfig(
model_name="Qwen/Qwen2.5-7B",
batch_size=8,
temperature=0.7,
top_k=50,
top_p=0.9,
max_new_tokens=2048,
save_model=True
)
# 둜거 μ„€μ •
logger = TestTimeLogger(
log_dir="/home/ubuntu/RLVR/TestTime-RLVR-v2/test_verl_logs",
log_level="INFO"
)
# Trainer μ΄ˆκΈ°ν™”
trainer = IterativeTrainer(
config=config,
logger=logger,
verl_config_path="/home/ubuntu/RLVR/TestTime-RLVR-v2/test/configs/ttrlvr_azr_ppo_4gpu.yaml",
save_every_round=True
)
# ν•™μŠ΅ 데이터 경둜
training_data_path = "/home/ubuntu/RLVR/TestTime-RLVR-v2/tmp/batch_results/ttrlvr_azr_20250805_142357/mbpp/Mbpp_2/round_1/azr_training_data"
logger.log_info("="*80)
logger.log_info("πŸ§ͺ VeRL Training Test - Step 5 Only")
logger.log_info("="*80)
logger.log_info(f"πŸ“ Training data: {training_data_path}")
# 데이터 파일 확인
if os.path.exists(training_data_path):
files = os.listdir(training_data_path)
logger.log_info(f"πŸ“„ Found {len(files)} files:")
for f in sorted(files):
if f.endswith('.parquet'):
file_path = os.path.join(training_data_path, f)
file_size = os.path.getsize(file_path) / 1024 / 1024 # MB
logger.log_info(f" - {f}: {file_size:.2f} MB")
else:
logger.log_error(f"❌ Training data directory not found: {training_data_path}")
return
# GPU λ©”λͺ¨λ¦¬ μƒνƒœ 확인
if torch.cuda.is_available():
logger.log_info(f"πŸ–₯️ GPU available: {torch.cuda.get_device_name(0)}")
logger.log_info(f"πŸ“Š GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
# ν˜„μž¬ GPU μ‚¬μš©λŸ‰
allocated = torch.cuda.memory_allocated() / 1024**3
reserved = torch.cuda.memory_reserved() / 1024**3
logger.log_info(f"πŸ“Š Current usage - Allocated: {allocated:.2f} GB, Reserved: {reserved:.2f} GB")
# VeRL ν•™μŠ΅λ§Œ μ‹€ν–‰ (Step 5)
try:
logger.log_info("\n" + "="*80)
logger.log_info("πŸš€ Starting VeRL training test...")
logger.log_info("="*80 + "\n")
# run_verl_training_only λ©”μ„œλ“œ 호좜
result = trainer.run_verl_training_only(
training_data_path=training_data_path,
round_num=1,
experiment_name="verl_test_step5_only"
)
if result['success']:
logger.log_info("\n" + "="*80)
logger.log_info("βœ… VeRL training test completed successfully!")
logger.log_info("="*80)
# κ²°κ³Ό μš”μ•½
if 'duration' in result:
logger.log_info(f"⏱️ Training duration: {result['duration']:.2f} seconds")
if 'model_path' in result:
logger.log_info(f"πŸ’Ύ Model saved to: {result['model_path']}")
# llm_responses 확인
llm_responses_dir = os.path.join(training_data_path, "llm_responses")
if os.path.exists(llm_responses_dir):
response_files = [f for f in os.listdir(llm_responses_dir) if f.endswith('.jsonl')]
logger.log_info(f"πŸ“ Generated {len(response_files)} response files")
for f in sorted(response_files)[:5]: # 처음 5개만 ν‘œμ‹œ
logger.log_info(f" - {f}")
else:
logger.log_warning("⚠️ No llm_responses directory found")
else:
logger.log_error("\n" + "="*80)
logger.log_error(f"❌ VeRL training failed: {result.get('error', 'Unknown error')}")
logger.log_error("="*80)
except Exception as e:
logger.log_error(f"\nπŸ’₯ Test failed with exception: {e}")
import traceback
traceback.print_exc()
finally:
# 정리
logger.log_info("\n🧹 Cleaning up...")
if hasattr(trainer, 'cleanup'):
trainer.cleanup()
# μ΅œμ’… GPU λ©”λͺ¨λ¦¬ μƒνƒœ
if torch.cuda.is_available():
torch.cuda.empty_cache()
allocated = torch.cuda.memory_allocated() / 1024**3
reserved = torch.cuda.memory_reserved() / 1024**3
logger.log_info(f"πŸ“Š Final GPU usage - Allocated: {allocated:.2f} GB, Reserved: {reserved:.2f} GB")
logger.log_info("\nβœ… Test script completed")
if __name__ == "__main__":
# ν™˜κ²½ λ³€μˆ˜ μ„€μ •
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3" # 4개 GPU μ‚¬μš©
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["NCCL_DEBUG"] = "WARN"
print("\n" + "="*80)
print("πŸ§ͺ VeRL Training Test - Testing Step 5 Only")
print("πŸ“ Using existing data from previous run")
print("="*80 + "\n")
test_verl_training()