|
assistant_tag: assistant |
|
bf16: 'True' |
|
content_tag: value |
|
cutoff_len: '16384' |
|
dataset: mlfoundations-dev/stratos_unverified_mix |
|
dataset_dir: ONLINE |
|
ddp_timeout: '180000000' |
|
deepspeed: /opt/ml/code/zero3_offload.json |
|
do_train: 'True' |
|
enable_liger_kernel: 'False' |
|
finetuning_type: full |
|
formatting: sharegpt |
|
global_batch_size: '96' |
|
gradient_accumulation_steps: '6' |
|
hub_model_id: mlfoundations-dev/stratos_unverified_mix_2nodes |
|
learning_rate: 1e-05 |
|
logging_steps: '1' |
|
lr_scheduler_type: cosine |
|
max_samples: '1000000' |
|
messages: conversations |
|
model_name_or_path: Qwen/Qwen2.5-7B-Instruct |
|
num_train_epochs: '3.0' |
|
output_dir: /opt/ml/model |
|
overwrite_cache: 'True' |
|
per_device_train_batch_size: '1' |
|
plot_loss: 'True' |
|
preprocessing_num_workers: '16' |
|
push_to_db: 'True' |
|
push_to_hub: 'True' |
|
report_to: wandb |
|
role_tag: from |
|
run_name: stratos_unverified_mix |
|
save_steps: '100' |
|
stage: sft |
|
template: qwen25 |
|
user_tag: user |
|
warmup_ratio: '0.1' |
|
|