Safetensors
English
llama
Nessii013 commited on
Commit
494f1e5
·
verified ·
1 Parent(s): f3d9037

Create oumi_train.yaml

Browse files
Files changed (1) hide show
  1. oumi/oumi_train.yaml +71 -0
oumi/oumi_train.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Lora config for CALM 405B.
2
+
3
+ model:
4
+ model_name: "meta-llama/Llama-3.1-8B-Instruct"
5
+ model_max_length: 4096
6
+ torch_dtype_str: "bfloat16"
7
+ attn_implementation: "sdpa"
8
+ load_pretrained_weights: True
9
+ trust_remote_code: True
10
+ tokenizer_pad_token: "<|finetune_right_pad_id|>"
11
+ enable_liger_kernel: True
12
+
13
+ data:
14
+ train:
15
+ datasets:
16
+ - dataset_name: "text_sft_jsonl"
17
+ dataset_path: "/path/to/training/dataset.jsonl"
18
+ shuffle: True
19
+ seed: 42
20
+ collator_name: "text_completions_only_with_padding"
21
+ target_col: "messages"
22
+ use_async_dataset: True
23
+ seed: 42
24
+ validation:
25
+ datasets:
26
+ - dataset_name: "text_sft_jsonl"
27
+ dataset_path: "/path/to/validation/dataset.jsonl"
28
+ collator_name: "text_completions_only_with_padding"
29
+ target_col: "messages"
30
+ use_async_dataset: True
31
+ seed: 42
32
+
33
+ training:
34
+ trainer_type: "TRL_SFT"
35
+ use_peft: True
36
+ save_steps: 300
37
+ num_train_epochs: 1
38
+ per_device_train_batch_size: 16
39
+ gradient_accumulation_steps: 1
40
+ eval_strategy: "steps"
41
+ eval_steps: 300
42
+ per_device_eval_batch_size: 4
43
+
44
+ enable_gradient_checkpointing: True
45
+ gradient_checkpointing_kwargs:
46
+ use_reentrant: False
47
+ ddp_find_unused_parameters: False
48
+ optimizer: "adamw_torch_fused"
49
+ learning_rate: 4.0e-05
50
+ warmup_steps: 24
51
+ weight_decay: 0.01
52
+ max_grad_norm: 10
53
+ compile: False
54
+
55
+ dataloader_num_workers: "auto"
56
+ dataloader_prefetch_factor: 32
57
+
58
+ logging_steps: 10
59
+ log_model_summary: False
60
+ empty_device_cache_steps: 1
61
+ include_performance_metrics: True
62
+ output_dir: "output/llama405b.qlora"
63
+ enable_wandb: True
64
+
65
+ fsdp:
66
+ enable_fsdp: True
67
+ forward_prefetch: True
68
+ use_orig_params: True
69
+ cpu_offload: True
70
+ auto_wrap_policy: "TRANSFORMER_BASED_WRAP"
71
+ transformer_layer_cls: "LlamaDecoderLayer"