|
|
|
model_name_or_path: HoangHa/Pensez-v0.1-init |
|
trust_remote_code: true |
|
enable_liger_kernel: true |
|
use_unsloth_gc: true |
|
|
|
|
|
stage: sft |
|
do_train: true |
|
finetuning_type: full |
|
deepspeed: examples/deepspeed/ds_z3_offload_config.json |
|
|
|
|
|
dataset: pensez |
|
template: qwen |
|
cutoff_len: 16384 |
|
|
|
overwrite_cache: true |
|
preprocessing_num_workers: 16 |
|
neat_packing: true |
|
|
|
|
|
output_dir: saves/fr-8b/full/sft |
|
logging_steps: 1 |
|
|
|
save_strategy: "epoch" |
|
plot_loss: true |
|
overwrite_output_dir: true |
|
|
|
|
|
per_device_train_batch_size: 2 |
|
gradient_accumulation_steps: 1 |
|
learning_rate: 1.0e-5 |
|
num_train_epochs: 5.0 |
|
lr_scheduler_type: cosine |
|
warmup_ratio: 0.05 |
|
bf16: true |
|
ddp_timeout: 180000000 |
|
neftune_noise_alpha: 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
report_to: wandb |
|
run_name: fr-pensez |
|
|