new llama-2 default settings (#370)
Browse files* new default settings
* fix whitespace
* rm max packed sequence length
---------
Co-authored-by: Mads Henrichsen <[email protected]>
- examples/llama-2/lora.yml +3 -4
- examples/llama-2/qlora.yml +4 -4
examples/llama-2/lora.yml
CHANGED
@@ -15,7 +15,7 @@ val_set_size: 0.01
|
|
15 |
output_dir: ./lora-out
|
16 |
|
17 |
sequence_len: 4096
|
18 |
-
|
19 |
|
20 |
adapter: lora
|
21 |
lora_model_dir:
|
@@ -49,8 +49,8 @@ early_stopping_patience:
|
|
49 |
resume_from_checkpoint:
|
50 |
local_rank:
|
51 |
logging_steps: 1
|
52 |
-
xformers_attention:
|
53 |
-
flash_attention:
|
54 |
|
55 |
warmup_steps: 10
|
56 |
eval_steps: 20
|
@@ -64,4 +64,3 @@ special_tokens:
|
|
64 |
bos_token: "<s>"
|
65 |
eos_token: "</s>"
|
66 |
unk_token: "<unk>"
|
67 |
-
pad_token: "<pad>"
|
|
|
15 |
output_dir: ./lora-out
|
16 |
|
17 |
sequence_len: 4096
|
18 |
+
sample_packing: true
|
19 |
|
20 |
adapter: lora
|
21 |
lora_model_dir:
|
|
|
49 |
resume_from_checkpoint:
|
50 |
local_rank:
|
51 |
logging_steps: 1
|
52 |
+
xformers_attention:
|
53 |
+
flash_attention: true
|
54 |
|
55 |
warmup_steps: 10
|
56 |
eval_steps: 20
|
|
|
64 |
bos_token: "<s>"
|
65 |
eos_token: "</s>"
|
66 |
unk_token: "<unk>"
|
|
examples/llama-2/qlora.yml
CHANGED
@@ -18,7 +18,8 @@ adapter: qlora
|
|
18 |
lora_model_dir:
|
19 |
|
20 |
sequence_len: 4096
|
21 |
-
|
|
|
22 |
lora_r: 32
|
23 |
lora_alpha: 16
|
24 |
lora_dropout: 0.05
|
@@ -50,8 +51,8 @@ early_stopping_patience:
|
|
50 |
resume_from_checkpoint:
|
51 |
local_rank:
|
52 |
logging_steps: 1
|
53 |
-
xformers_attention:
|
54 |
-
flash_attention:
|
55 |
|
56 |
warmup_steps: 10
|
57 |
eval_steps: 20
|
@@ -65,4 +66,3 @@ special_tokens:
|
|
65 |
bos_token: "<s>"
|
66 |
eos_token: "</s>"
|
67 |
unk_token: "<unk>"
|
68 |
-
pad_token: "<pad>"
|
|
|
18 |
lora_model_dir:
|
19 |
|
20 |
sequence_len: 4096
|
21 |
+
sample_packing: true
|
22 |
+
|
23 |
lora_r: 32
|
24 |
lora_alpha: 16
|
25 |
lora_dropout: 0.05
|
|
|
51 |
resume_from_checkpoint:
|
52 |
local_rank:
|
53 |
logging_steps: 1
|
54 |
+
xformers_attention:
|
55 |
+
flash_attention: true
|
56 |
|
57 |
warmup_steps: 10
|
58 |
eval_steps: 20
|
|
|
66 |
bos_token: "<s>"
|
67 |
eos_token: "</s>"
|
68 |
unk_token: "<unk>"
|
|