liuylhf commited on
Commit
8a37e5d
·
verified ·
1 Parent(s): d6780ab

Model save

Browse files
README.md CHANGED
@@ -2,11 +2,10 @@
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
5
- - axolotl
6
  - generated_from_trainer
7
  base_model: mistralai/Mixtral-8x7B-Instruct-v0.1
8
  model-index:
9
- - name: mixtral-lora
10
  results: []
11
  ---
12
 
@@ -41,7 +40,7 @@ datasets:
41
 
42
  dataset_prepared_path: last_run_prepared
43
  val_set_size: 0.01
44
- output_dir: ../../text-generation-webui/loras/mixtral-instruct-raw-data-v3-inst
45
 
46
  adapter: qlora
47
  lora_model_dir:
@@ -50,21 +49,18 @@ sequence_len: 4096
50
  sample_packing: true
51
  pad_to_sequence_len: true
52
 
53
- lora_r: 16
54
  lora_alpha: 16
55
  lora_dropout: 0.05
56
- lora_target_linear: true
57
  lora_fan_in_fan_out:
58
- hub_model_id: liuylhf/mixtral-lora
 
59
 
60
- # lora_target_modules:
61
- # - gate_proj
62
- # - down_proj
63
- # - up_proj
64
- # - q_proj
65
- # - v_proj
66
- # - k_proj
67
- # - o_proj
68
 
69
  wandb_project: function-call
70
  wandb_name: mixtral-instruct-raw-data-v3
@@ -72,7 +68,7 @@ wandb_log_model: end
72
 
73
  gradient_accumulation_steps: 4
74
  micro_batch_size: 2
75
- num_epochs: 0.5
76
  optimizer: paged_adamw_8bit
77
  lr_scheduler: cosine
78
  learning_rate: 0.001
@@ -113,11 +109,9 @@ fsdp_config:
113
 
114
  </details><br>
115
 
116
- # mixtral-lora
117
 
118
- This model is a fine-tuned version of [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) on the None dataset.
119
- It achieves the following results on the evaluation set:
120
- - Loss: 0.1923
121
 
122
  ## Model description
123
 
@@ -148,23 +142,7 @@ The following hyperparameters were used during training:
148
  - optimizer: Adam with betas=(0.9,0.95) and epsilon=1e-05
149
  - lr_scheduler_type: cosine
150
  - lr_scheduler_warmup_steps: 10
151
- - num_epochs: 0.5
152
-
153
- ### Training results
154
-
155
- | Training Loss | Epoch | Step | Validation Loss |
156
- |:-------------:|:-----:|:----:|:---------------:|
157
- | 3.2966 | 0.0 | 1 | 3.2222 |
158
- | 0.3736 | 0.05 | 16 | 0.3541 |
159
- | 0.1777 | 0.1 | 32 | 0.2357 |
160
- | 0.2366 | 0.16 | 48 | 0.2154 |
161
- | 0.1917 | 0.21 | 64 | 0.2056 |
162
- | 0.2213 | 0.26 | 80 | 0.2003 |
163
- | 0.149 | 0.31 | 96 | 0.1972 |
164
- | 0.1739 | 0.37 | 112 | 0.1950 |
165
- | 0.1668 | 0.42 | 128 | 0.1928 |
166
- | 0.0997 | 0.47 | 144 | 0.1923 |
167
-
168
 
169
  ### Framework versions
170
 
 
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
 
5
  - generated_from_trainer
6
  base_model: mistralai/Mixtral-8x7B-Instruct-v0.1
7
  model-index:
8
+ - name: mixtral-lora-less-modules
9
  results: []
10
  ---
11
 
 
40
 
41
  dataset_prepared_path: last_run_prepared
42
  val_set_size: 0.01
43
+ output_dir: ./mixtral-qlora-1-epochs-r64
44
 
45
  adapter: qlora
46
  lora_model_dir:
 
49
  sample_packing: true
50
  pad_to_sequence_len: true
51
 
52
+ lora_r: 64
53
  lora_alpha: 16
54
  lora_dropout: 0.05
 
55
  lora_fan_in_fan_out:
56
+ hub_model_id: liuylhf/mixtral-lora-less-modules
57
+ hub_strategy: end
58
 
59
+ lora_target_modules:
60
+ - q_proj
61
+ - v_proj
62
+ - k_proj
63
+ - o_proj
 
 
 
64
 
65
  wandb_project: function-call
66
  wandb_name: mixtral-instruct-raw-data-v3
 
68
 
69
  gradient_accumulation_steps: 4
70
  micro_batch_size: 2
71
+ num_epochs: 1
72
  optimizer: paged_adamw_8bit
73
  lr_scheduler: cosine
74
  learning_rate: 0.001
 
109
 
110
  </details><br>
111
 
112
+ # mixtral-lora-less-modules
113
 
114
+ This model is a fine-tuned version of [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) on an unknown dataset.
 
 
115
 
116
  ## Model description
117
 
 
142
  - optimizer: Adam with betas=(0.9,0.95) and epsilon=1e-05
143
  - lr_scheduler_type: cosine
144
  - lr_scheduler_warmup_steps: 10
145
+ - num_epochs: 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  ### Framework versions
148
 
adapter_config.json CHANGED
@@ -15,14 +15,14 @@
15
  "megatron_core": "megatron.core",
16
  "modules_to_save": null,
17
  "peft_type": "LORA",
18
- "r": 16,
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "k_proj",
23
- "v_proj",
24
  "o_proj",
25
- "q_proj"
 
 
26
  ],
27
  "task_type": "CAUSAL_LM",
28
  "use_rslora": false
 
15
  "megatron_core": "megatron.core",
16
  "modules_to_save": null,
17
  "peft_type": "LORA",
18
+ "r": 64,
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
 
22
  "o_proj",
23
+ "q_proj",
24
+ "v_proj",
25
+ "k_proj"
26
  ],
27
  "task_type": "CAUSAL_LM",
28
  "use_rslora": false
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d1e3ee7a461a4f17d797b4909eef93fa2ae7c5281de3f753c9b7c529565add3
3
- size 54560368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd223b7576019fbc18fdb6df07a26b9b662da56d34b65c55bfe245668d413a3f
3
+ size 218138576
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49a4507c0a7fa5ace37d5264a12a523497d49f9a6ac3c7578d225d933fea3617
3
- size 5752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:007877440649249ac071b2c5c00afa3c113d2399d566bb3f2dab4235d750f45e
3
+ size 5624