Robot2050
/

Meta-chunker-1.5B

Model card Files Files and versions Community

Meta-chunker-1.5B / train_args.json

Robot2050's picture

Upload 14 files

92fdb6e verified 29 days ago

history blame contribute delete

854 Bytes

	{
	"output_dir": "output",
	"model_name_or_path": "models/Qwen2.5-1.5B-Instruct",
	"deepspeed": "./train_args/ds_z3_config.json",
	"train_file": "train_GPT/conversations_20000_MASK.jsonl",
	"template_name": "qwen",
	"train_mode": "full",
	"num_train_epochs": 3,
	"per_device_train_batch_size": 4,
	"gradient_accumulation_steps": 16,
	"learning_rate": 1e-05,
	"max_seq_length": 4096,
	"logging_steps": 200,
	"save_steps": 200,
	"save_total_limit": 1,
	"lr_scheduler_type": "cosine",
	"warmup_steps": 50,
	"gradient_checkpointing": false,
	"disable_tqdm": false,
	"optim": "adamw_hf",
	"seed": 42,
	"fp16": true,
	"report_to": "tensorboard",
	"dataloader_num_workers": 0,
	"save_strategy": "steps",
	"weight_decay": 0,
	"max_grad_norm": 1.0,
	"remove_unused_columns": false
	}