accumulative_counts = 2 avg_num_per_pack = 5 batch_size = 1 betas = ( 0.9, 0.95, ) custom_hooks = [ dict( by_epoch=False, interval=200, type='xtuner.engine.hooks.DataResumeHook'), dict(type='xtuner.engine.hooks.VarlenAttnArgsToMessageHubHook'), ] data_num = 510000000 data_path = '/cpfs01/shared/alillm_hs/zouyicheng/rm_pretrain/data/train' dataloader_num_workers = 0 default_hooks = dict( checkpoint=dict( by_epoch=False, interval=200, max_keep_ckpts=10, type='mmengine.hooks.CheckpointHook'), logger=dict( interval=10, log_metric_by_epoch=False, type='mmengine.hooks.LoggerHook'), param_scheduler=dict(type='mmengine.hooks.ParamSchedulerHook'), sampler_seed=dict(type='mmengine.hooks.DistSamplerSeedHook'), timer=dict(type='mmengine.hooks.IterTimerHook')) env_cfg = dict( cudnn_benchmark=False, dist_cfg=dict(backend='nccl'), mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) launcher = 'pytorch' load_from = None log_level = 'INFO' log_processor = dict(by_epoch=False) loss_type = 'ranking' lr = 1.45e-05 max_epochs = 1 max_length = 16384 max_norm = 1 max_packed_length = 32768 max_response_length = 5120 model = dict( llm=dict( pretrained_model_name_or_path= '/cpfs01/shared/alillm_hs/zouyicheng/xtuner/model/internlm2_5-7b', trust_remote_code=True, type='transformers.AutoModel.from_pretrained'), loss_type='ranking', penalty_type='none', type='xtuner.model.reward.RewardModel', use_varlen_attn=True) optim_type = 'torch.optim.AdamW' optim_wrapper = dict( optimizer=dict( betas=( 0.9, 0.95, ), lr=1.45e-05, type='torch.optim.AdamW', weight_decay=0), type='DeepSpeedOptimWrapper') param_scheduler = [ dict( begin=0, by_epoch=True, convert_to_iter_based=True, end=0.03, start_factor=1.45e-06, type='mmengine.optim.LinearLR'), dict( begin=0.03, by_epoch=True, convert_to_iter_based=True, end=1, eta_min=1.45e-06, type='mmengine.optim.CosineAnnealingLR'), ] penalty_type = 'none' pretrained_model_name_or_path = '/cpfs01/shared/alillm_hs/zouyicheng/xtuner/model/internlm2_5-7b' randomness = dict(deterministic=False, seed=None) resume = True reward_token_id = 92527 runner_type = 'FlexibleRunner' sampler = 'mmengine.dataset.DefaultSampler' save_steps = 200 save_total_limit = 10 sequence_parallel_size = 1 strategy = dict( config=dict( bf16=dict(enabled=True), fp16=dict(enabled=False, initial_scale_power=16), gradient_accumulation_steps='auto', gradient_clipping='auto', train_micro_batch_size_per_gpu='auto', zero_allow_untested_optimizer=True, zero_force_ds_cpu_optimizer=False, zero_optimization=dict(overlap_comm=True, stage=1)), exclude_frozen_parameters=True, gradient_accumulation_steps=2, gradient_clipping=1, sequence_parallel_size=1, train_micro_batch_size_per_gpu=1, type='xtuner.engine.DeepSpeedStrategy') tokenizer = dict( padding_side='left', pretrained_model_name_or_path= '/cpfs01/shared/alillm_hs/zouyicheng/xtuner/model/internlm2_5-7b', trust_remote_code=True, type='transformers.AutoTokenizer.from_pretrained') train_cfg = dict(max_epochs=1, type='xtuner.engine.runner.TrainLoop') train_dataloader = dict( batch_size=1, collate_fn=dict( type= 'xtuner.dataset.collate_fns.preference_collate_fn.preference_collate_fn', use_varlen_attn=True), dataset=dict( avg_num_per_pack=5, data_num=510000000, dataset=dict( path='/cpfs01/shared/alillm_hs/zouyicheng/rm_pretrain/data/train', streaming=True, type='datasets.load_dataset'), dataset_map_fn=None, if_pretrain=True, is_dpo=False, is_reward=True, max_length=16384, max_packed_length=32768, max_response_length=5120, num_proc=32, reward_token_id=92527, shuffle_before_pack=True, tokenizer=dict( padding_side='left', pretrained_model_name_or_path= '/cpfs01/shared/alillm_hs/zouyicheng/xtuner/model/internlm2_5-7b', trust_remote_code=True, type='transformers.AutoTokenizer.from_pretrained'), type= 'xtuner.dataset.preference_dataset.build_preference_dataset_stream', use_varlen_attn=True, work_dir_name= 'RM_PT_internlm2_5_7b_DATA_510m_single_mix_Node_57_LR_1_45e_5'), drop_last=True, num_workers=0) train_dataset = dict( avg_num_per_pack=5, data_num=510000000, dataset=dict( path='/cpfs01/shared/alillm_hs/zouyicheng/rm_pretrain/data/train', streaming=True, type='datasets.load_dataset'), dataset_map_fn=None, if_pretrain=True, is_dpo=False, is_reward=True, max_length=16384, max_packed_length=32768, max_response_length=5120, num_proc=32, reward_token_id=92527, shuffle_before_pack=True, tokenizer=dict( padding_side='left', pretrained_model_name_or_path= '/cpfs01/shared/alillm_hs/zouyicheng/xtuner/model/internlm2_5-7b', trust_remote_code=True, type='transformers.AutoTokenizer.from_pretrained'), type='xtuner.dataset.preference_dataset.build_preference_dataset_stream', use_varlen_attn=True, work_dir_name='RM_PT_internlm2_5_7b_DATA_510m_single_mix_Node_57_LR_1_45e_5' ) use_varlen_attn = True visualizer = dict( type='mmengine.visualization.Visualizer', vis_backends=[ dict(type='mmengine.visualization.TensorboardVisBackend'), ]) warmup_ratio = 0.03 weight_decay = 0 work_dir = './work_dirs/RM_PT_internlm2_5_7b_DATA_510m_single_mix_Node_57_LR_1_45e_5'