|
|
|
""" |
|
VeRL νμ΅ λ¨λ
ν
μ€νΈ μ€ν¬λ¦½νΈ |
|
κΈ°μ‘΄ λ°μ΄ν°λ‘ Step 5λ§ μ€ννμ¬ νμ΅μ΄ μ λλ‘ λλμ§ νμΈ |
|
""" |
|
import os |
|
import sys |
|
sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2') |
|
sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2/test') |
|
sys.path.append('/home/ubuntu/RLVR/verl') |
|
|
|
from utils.iterative_trainer import IterativeTrainer |
|
from absolute_zero_reasoner.testtime.config import TestTimeConfig |
|
from absolute_zero_reasoner.testtime.logger import TestTimeLogger |
|
import torch |
|
|
|
def test_verl_training(): |
|
"""κΈ°μ‘΄ λ°μ΄ν°λ‘ VeRL νμ΅λ§ ν
μ€νΈ""" |
|
|
|
|
|
config = TestTimeConfig( |
|
model_name="Qwen/Qwen2.5-7B", |
|
batch_size=8, |
|
temperature=0.7, |
|
top_k=50, |
|
top_p=0.9, |
|
max_new_tokens=2048, |
|
save_model=True |
|
) |
|
|
|
|
|
logger = TestTimeLogger( |
|
log_dir="/home/ubuntu/RLVR/TestTime-RLVR-v2/test_verl_logs", |
|
log_level="INFO" |
|
) |
|
|
|
|
|
trainer = IterativeTrainer( |
|
config=config, |
|
logger=logger, |
|
verl_config_path="/home/ubuntu/RLVR/TestTime-RLVR-v2/test/configs/ttrlvr_azr_ppo_4gpu.yaml", |
|
save_every_round=True |
|
) |
|
|
|
|
|
training_data_path = "/home/ubuntu/RLVR/TestTime-RLVR-v2/tmp/batch_results/ttrlvr_azr_20250805_142357/mbpp/Mbpp_2/round_1/azr_training_data" |
|
|
|
logger.log_info("="*80) |
|
logger.log_info("π§ͺ VeRL Training Test - Step 5 Only") |
|
logger.log_info("="*80) |
|
logger.log_info(f"π Training data: {training_data_path}") |
|
|
|
|
|
if os.path.exists(training_data_path): |
|
files = os.listdir(training_data_path) |
|
logger.log_info(f"π Found {len(files)} files:") |
|
for f in sorted(files): |
|
if f.endswith('.parquet'): |
|
file_path = os.path.join(training_data_path, f) |
|
file_size = os.path.getsize(file_path) / 1024 / 1024 |
|
logger.log_info(f" - {f}: {file_size:.2f} MB") |
|
else: |
|
logger.log_error(f"β Training data directory not found: {training_data_path}") |
|
return |
|
|
|
|
|
if torch.cuda.is_available(): |
|
logger.log_info(f"π₯οΈ GPU available: {torch.cuda.get_device_name(0)}") |
|
logger.log_info(f"π GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB") |
|
|
|
|
|
allocated = torch.cuda.memory_allocated() / 1024**3 |
|
reserved = torch.cuda.memory_reserved() / 1024**3 |
|
logger.log_info(f"π Current usage - Allocated: {allocated:.2f} GB, Reserved: {reserved:.2f} GB") |
|
|
|
|
|
try: |
|
logger.log_info("\n" + "="*80) |
|
logger.log_info("π Starting VeRL training test...") |
|
logger.log_info("="*80 + "\n") |
|
|
|
|
|
result = trainer.run_verl_training_only( |
|
training_data_path=training_data_path, |
|
round_num=1, |
|
experiment_name="verl_test_step5_only" |
|
) |
|
|
|
if result['success']: |
|
logger.log_info("\n" + "="*80) |
|
logger.log_info("β
VeRL training test completed successfully!") |
|
logger.log_info("="*80) |
|
|
|
|
|
if 'duration' in result: |
|
logger.log_info(f"β±οΈ Training duration: {result['duration']:.2f} seconds") |
|
if 'model_path' in result: |
|
logger.log_info(f"πΎ Model saved to: {result['model_path']}") |
|
|
|
|
|
llm_responses_dir = os.path.join(training_data_path, "llm_responses") |
|
if os.path.exists(llm_responses_dir): |
|
response_files = [f for f in os.listdir(llm_responses_dir) if f.endswith('.jsonl')] |
|
logger.log_info(f"π Generated {len(response_files)} response files") |
|
for f in sorted(response_files)[:5]: |
|
logger.log_info(f" - {f}") |
|
else: |
|
logger.log_warning("β οΈ No llm_responses directory found") |
|
|
|
else: |
|
logger.log_error("\n" + "="*80) |
|
logger.log_error(f"β VeRL training failed: {result.get('error', 'Unknown error')}") |
|
logger.log_error("="*80) |
|
|
|
except Exception as e: |
|
logger.log_error(f"\nπ₯ Test failed with exception: {e}") |
|
import traceback |
|
traceback.print_exc() |
|
|
|
finally: |
|
|
|
logger.log_info("\nπ§Ή Cleaning up...") |
|
if hasattr(trainer, 'cleanup'): |
|
trainer.cleanup() |
|
|
|
|
|
if torch.cuda.is_available(): |
|
torch.cuda.empty_cache() |
|
allocated = torch.cuda.memory_allocated() / 1024**3 |
|
reserved = torch.cuda.memory_reserved() / 1024**3 |
|
logger.log_info(f"π Final GPU usage - Allocated: {allocated:.2f} GB, Reserved: {reserved:.2f} GB") |
|
|
|
logger.log_info("\nβ
Test script completed") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3" |
|
os.environ["TOKENIZERS_PARALLELISM"] = "false" |
|
os.environ["NCCL_DEBUG"] = "WARN" |
|
|
|
print("\n" + "="*80) |
|
print("π§ͺ VeRL Training Test - Testing Step 5 Only") |
|
print("π Using existing data from previous run") |
|
print("="*80 + "\n") |
|
|
|
test_verl_training() |