Fizzarolli commited on
Commit
3f5a614
·
verified ·
1 Parent(s): 05dd3a1

Create axolotl_config.yaml

Browse files
Files changed (1) hide show
  1. axolotl_config.yaml +128 -0
axolotl_config.yaml ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Weights and Biases logging config
2
+ wandb_project: nemo-instruct-tune
3
+ wandb_entity:
4
+ wandb_watch:
5
+ wandb_name: v1
6
+ wandb_log_model:
7
+
8
+ # Model architecture config
9
+ base_model: mistralai/Mistral-Nemo-Base-2407
10
+ model_type: AutoModelForCausalLM
11
+ tokenizer_type: AutoTokenizer
12
+ chat_template: alpaca
13
+
14
+ # Hugging Face saving config
15
+ hub_model_id: Fizzarolli/nemo-instruct-tune-v1
16
+ hub_strategy: all_checkpoints
17
+ push_dataset_to_hub:
18
+ hf_use_auth_token:
19
+
20
+ # Model checkpointing config
21
+ output_dir: ./lora-out
22
+ resume_from_checkpoint:
23
+ save_steps:
24
+ saves_per_epoch: 10
25
+ save_safetensors: true
26
+ save_total_limit: 2
27
+
28
+ # Mixed precision training config
29
+ bf16: true
30
+ fp16: false
31
+ tf32: false
32
+
33
+ # Model loading config
34
+ load_in_8bit: false
35
+ load_in_4bit: true
36
+ strict: false
37
+
38
+ # Sequence config
39
+ sequence_len: 16384
40
+ s2_attention: false
41
+ sample_packing: true
42
+ eval_sample_packing: true
43
+ pad_to_sequence_len: true
44
+ train_on_inputs: false
45
+ group_by_length: false
46
+
47
+ # QLoRA adapter config
48
+ adapter: qlora
49
+ lora_model_dir:
50
+ lora_r: 64
51
+ lora_alpha: 64
52
+ lora_dropout: 0.125
53
+ lora_fan_in_fan_out:
54
+ lora_target_linear:
55
+ save_embedding_layers:
56
+ peft_layers_to_transform:
57
+ peft_use_dora: true
58
+ peft_use_rslora:
59
+ peft_layer_replication:
60
+ lora_target_modules:
61
+ - gate_proj
62
+ - down_proj
63
+ - up_proj
64
+ - q_proj
65
+ - v_proj
66
+ - k_proj
67
+ - o_proj
68
+ lora_modules_to_save:
69
+
70
+ # Unfrozen parameters for FFT
71
+ unfrozen_parameters:
72
+
73
+ # Dataset config
74
+ datasets:
75
+ - path: BeaverAI/Nemo-Inst-Tune-ds
76
+ type: chat_template
77
+ val_set_size: 0.05
78
+ evaluation_strategy:
79
+ eval_steps:
80
+ evals_per_epoch: 20
81
+ test_datasets:
82
+ dataset_prepared_path: ./prepared-datasets
83
+ shuffle_merged_datasets: true
84
+
85
+ # Training hyperparameters
86
+ num_epochs: 1
87
+ gradient_accumulation_steps: 2
88
+ micro_batch_size: 1
89
+ eval_batch_size: 1
90
+ warmup_steps: 25
91
+ optimizer: paged_adamw_8bit
92
+ lr_scheduler: cosine
93
+ learning_rate: 0.000007
94
+ loraplus_lr_ratio: 8
95
+ loraplus_lr_embedding:
96
+ cosine_min_lr_ratio: 0.1
97
+ weight_decay: 0.1
98
+ max_grad_norm: 1
99
+ logging_steps: 1
100
+
101
+ # Model optimization
102
+ gradient_checkpointing: unsloth
103
+ xformers_attention: false
104
+ flash_attention: true
105
+ sdp_attention: false
106
+ unsloth_cross_entropy_loss: false
107
+ unsloth_lora_mlp: false
108
+ unsloth_lora_qkv: false
109
+ unsloth_lora_o: false
110
+
111
+ # Loss monitoring config
112
+ early_stopping_patience: false
113
+ loss_watchdog_threshold: 100.0
114
+ loss_watchdog_patience: 3
115
+
116
+ # Debug config
117
+ debug: true
118
+ seed: 42
119
+
120
+ # DeepSpeed and FSDP config
121
+ deepspeed: deepspeed_configs/zero2.json
122
+ fsdp:
123
+ fsdp_config:
124
+
125
+ # Token config
126
+ special_tokens:
127
+ pad_token: "<pad>"
128
+ tokens: