neural-mesh-v2 / test_verl_training.py

Restore all essential files - code, configs, and MBPP/HumanEval data

24c2665 verified 26 days ago

5.61 kB

	#!/usr/bin/env python3
	"""
	VeRL 학습 단독 테스트 스크립트
	기존 데이터로 Step 5만 실행하여 학습이 제대로 되는지 확인
	"""
	import os
	import sys
	sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2')
	sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2/test')
	sys.path.append('/home/ubuntu/RLVR/verl')

	from utils.iterative_trainer import IterativeTrainer
	from absolute_zero_reasoner.testtime.config import TestTimeConfig
	from absolute_zero_reasoner.testtime.logger import TestTimeLogger
	import torch

	def test_verl_training():
	"""기존 데이터로 VeRL 학습만 테스트"""

	# 기본 설정
	config = TestTimeConfig(
	model_name="Qwen/Qwen2.5-7B",
	batch_size=8,
	temperature=0.7,
	top_k=50,
	top_p=0.9,
	max_new_tokens=2048,
	save_model=True
	)

	# 로거 설정
	logger = TestTimeLogger(
	log_dir="/home/ubuntu/RLVR/TestTime-RLVR-v2/test_verl_logs",
	log_level="INFO"
	)

	# Trainer 초기화
	trainer = IterativeTrainer(
	config=config,
	logger=logger,
	verl_config_path="/home/ubuntu/RLVR/TestTime-RLVR-v2/test/configs/ttrlvr_azr_ppo_4gpu.yaml",
	save_every_round=True
	)

	# 학습 데이터 경로
	training_data_path = "/home/ubuntu/RLVR/TestTime-RLVR-v2/tmp/batch_results/ttrlvr_azr_20250805_142357/mbpp/Mbpp_2/round_1/azr_training_data"

	logger.log_info("="*80)
	logger.log_info("🧪 VeRL Training Test - Step 5 Only")
	logger.log_info("="*80)
	logger.log_info(f"📁 Training data: {training_data_path}")

	# 데이터 파일 확인
	if os.path.exists(training_data_path):
	files = os.listdir(training_data_path)
	logger.log_info(f"📄 Found {len(files)} files:")
	for f in sorted(files):
	if f.endswith('.parquet'):
	file_path = os.path.join(training_data_path, f)
	file_size = os.path.getsize(file_path) / 1024 / 1024 # MB
	logger.log_info(f" - {f}: {file_size:.2f} MB")
	else:
	logger.log_error(f"❌ Training data directory not found: {training_data_path}")
	return

	# GPU 메모리 상태 확인
	if torch.cuda.is_available():
	logger.log_info(f"🖥️ GPU available: {torch.cuda.get_device_name(0)}")
	logger.log_info(f"📊 GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

	# 현재 GPU 사용량
	allocated = torch.cuda.memory_allocated() / 1024**3
	reserved = torch.cuda.memory_reserved() / 1024**3
	logger.log_info(f"📊 Current usage - Allocated: {allocated:.2f} GB, Reserved: {reserved:.2f} GB")

	# VeRL 학습만 실행 (Step 5)
	try:
	logger.log_info("\n" + "="*80)
	logger.log_info("🚀 Starting VeRL training test...")
	logger.log_info("="*80 + "\n")

	# run_verl_training_only 메서드 호출
	result = trainer.run_verl_training_only(
	training_data_path=training_data_path,
	round_num=1,
	experiment_name="verl_test_step5_only"
	)

	if result['success']:
	logger.log_info("\n" + "="*80)
	logger.log_info("✅ VeRL training test completed successfully!")
	logger.log_info("="*80)

	# 결과 요약
	if 'duration' in result:
	logger.log_info(f"⏱️ Training duration: {result['duration']:.2f} seconds")
	if 'model_path' in result:
	logger.log_info(f"💾 Model saved to: {result['model_path']}")

	# llm_responses 확인
	llm_responses_dir = os.path.join(training_data_path, "llm_responses")
	if os.path.exists(llm_responses_dir):
	response_files = [f for f in os.listdir(llm_responses_dir) if f.endswith('.jsonl')]
	logger.log_info(f"📝 Generated {len(response_files)} response files")
	for f in sorted(response_files)[:5]: # 처음 5개만 표시
	logger.log_info(f" - {f}")
	else:
	logger.log_warning("⚠️ No llm_responses directory found")

	else:
	logger.log_error("\n" + "="*80)
	logger.log_error(f"❌ VeRL training failed: {result.get('error', 'Unknown error')}")
	logger.log_error("="*80)

	except Exception as e:
	logger.log_error(f"\n💥 Test failed with exception: {e}")
	import traceback
	traceback.print_exc()

	finally:
	# 정리
	logger.log_info("\n🧹 Cleaning up...")
	if hasattr(trainer, 'cleanup'):
	trainer.cleanup()

	# 최종 GPU 메모리 상태
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	allocated = torch.cuda.memory_allocated() / 1024**3
	reserved = torch.cuda.memory_reserved() / 1024**3
	logger.log_info(f"📊 Final GPU usage - Allocated: {allocated:.2f} GB, Reserved: {reserved:.2f} GB")

	logger.log_info("\n✅ Test script completed")


	if __name__ == "__main__":
	# 환경 변수 설정
	os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3" # 4개 GPU 사용
	os.environ["TOKENIZERS_PARALLELISM"] = "false"
	os.environ["NCCL_DEBUG"] = "WARN"

	print("\n" + "="*80)
	print("🧪 VeRL Training Test - Testing Step 5 Only")
	print("📁 Using existing data from previous run")
	print("="*80 + "\n")

	test_verl_training()