update stablelm config
Browse files- configs/stability_3b.yml +30 -7
configs/stability_3b.yml
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
base_model: stabilityai/stablelm-base-alpha-3b
|
| 2 |
-
|
|
|
|
| 3 |
datasets:
|
| 4 |
- path: vicgalle/alpaca-gpt4
|
| 5 |
type: alpaca
|
|
@@ -8,6 +9,7 @@ val_set_size: 0.04
|
|
| 8 |
adapter:
|
| 9 |
lora_model_dir:
|
| 10 |
sequence_len: 4096
|
|
|
|
| 11 |
lora_r: 8
|
| 12 |
lora_alpha: 16
|
| 13 |
lora_dropout: 0.05
|
|
@@ -15,19 +17,40 @@ lora_target_modules:
|
|
| 15 |
- q_proj
|
| 16 |
- v_proj
|
| 17 |
lora_fan_in_fan_out: false
|
| 18 |
-
wandb_project: stable-
|
| 19 |
wandb_watch:
|
| 20 |
wandb_run_id:
|
| 21 |
wandb_log_model: checkpoint
|
| 22 |
-
output_dir: ./stable-
|
| 23 |
-
batch_size:
|
| 24 |
-
micro_batch_size:
|
| 25 |
num_epochs: 1
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
| 27 |
train_on_inputs: false
|
| 28 |
group_by_length: false
|
| 29 |
bf16: true
|
| 30 |
tf32: true
|
| 31 |
-
early_stopping_patience:
|
| 32 |
resume_from_checkpoint:
|
| 33 |
local_rank:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
base_model: stabilityai/stablelm-base-alpha-3b
|
| 2 |
+
base_model_config: stabilityai/stablelm-base-alpha-3b
|
| 3 |
+
load_in_8bit: false
|
| 4 |
datasets:
|
| 5 |
- path: vicgalle/alpaca-gpt4
|
| 6 |
type: alpaca
|
|
|
|
| 9 |
adapter:
|
| 10 |
lora_model_dir:
|
| 11 |
sequence_len: 4096
|
| 12 |
+
max_packed_sequence_len: 4096
|
| 13 |
lora_r: 8
|
| 14 |
lora_alpha: 16
|
| 15 |
lora_dropout: 0.05
|
|
|
|
| 17 |
- q_proj
|
| 18 |
- v_proj
|
| 19 |
lora_fan_in_fan_out: false
|
| 20 |
+
wandb_project: stable-alpaca-3b
|
| 21 |
wandb_watch:
|
| 22 |
wandb_run_id:
|
| 23 |
wandb_log_model: checkpoint
|
| 24 |
+
output_dir: ./stable-alpaca-3b
|
| 25 |
+
batch_size: 2
|
| 26 |
+
micro_batch_size: 1
|
| 27 |
num_epochs: 1
|
| 28 |
+
optimizer: adamw_bnb_8bit
|
| 29 |
+
torchdistx_path:
|
| 30 |
+
lr_scheduler: cosine
|
| 31 |
+
learning_rate: 0.0000002
|
| 32 |
train_on_inputs: false
|
| 33 |
group_by_length: false
|
| 34 |
bf16: true
|
| 35 |
tf32: true
|
| 36 |
+
early_stopping_patience:
|
| 37 |
resume_from_checkpoint:
|
| 38 |
local_rank:
|
| 39 |
+
logging_steps: 1
|
| 40 |
+
xformers_attention: true
|
| 41 |
+
flash_attention:
|
| 42 |
+
gptq_groupsize:
|
| 43 |
+
gptq_model_v1:
|
| 44 |
+
warmup_steps: 100
|
| 45 |
+
eval_steps: 50
|
| 46 |
+
save_steps: 200
|
| 47 |
+
debug:
|
| 48 |
+
deepspeed:
|
| 49 |
+
weight_decay: 0.01
|
| 50 |
+
fsdp:
|
| 51 |
+
fsdp_config:
|
| 52 |
+
#special_tokens:
|
| 53 |
+
# pad_token: "[PAD]"
|
| 54 |
+
# bos_token: "<s>"
|
| 55 |
+
# eos_token: "</s>"
|
| 56 |
+
# unk_token: "<unk>"
|