HoangHa
/

Pensez-v0.1-e5

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

Pensez-v0.1-e5 / fr_full_sft.yaml

HoangHa's picture

Update fr_full_sft.yaml

31104dc verified 8 days ago

history blame contribute delete

978 Bytes

	### model
	model_name_or_path: HoangHa/Pensez-v0.1-init
	trust_remote_code: true
	enable_liger_kernel: true
	use_unsloth_gc: true

	### method
	stage: sft
	do_train: true
	finetuning_type: full
	deepspeed: examples/deepspeed/ds_z3_offload_config.json # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]

	### dataset
	dataset: pensez
	template: qwen
	cutoff_len: 16384
	# max_samples: 1000
	overwrite_cache: true
	preprocessing_num_workers: 16
	neat_packing: true

	### output
	output_dir: saves/fr-8b/full/sft
	logging_steps: 1
	# save_steps: 500
	save_strategy: "epoch"
	plot_loss: true
	overwrite_output_dir: true

	### train
	per_device_train_batch_size: 2
	gradient_accumulation_steps: 1
	learning_rate: 1.0e-5
	num_train_epochs: 5.0
	lr_scheduler_type: cosine
	warmup_ratio: 0.05
	bf16: true
	ddp_timeout: 180000000
	neftune_noise_alpha: 5

	### eval
	# val_size: 0.1
	# per_device_eval_batch_size: 1
	# eval_strategy: steps
	# eval_steps: 500

	report_to: wandb
	run_name: fr-pensez # optional