Model save

Browse files

Files changed (4) hide show

README.md +14 -36
adapter_config.json +4 -4
adapter_model.safetensors +2 -2
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -2,11 +2,10 @@
 license: apache-2.0
 library_name: peft
 tags:
-- axolotl
 - generated_from_trainer
 base_model: mistralai/Mixtral-8x7B-Instruct-v0.1
 model-index:
-- name: mixtral-lora
   results: []
 ---
@@ -41,7 +40,7 @@ datasets:
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.01
-output_dir: ../../text-generation-webui/loras/mixtral-instruct-raw-data-v3-inst
 adapter: qlora
 lora_model_dir:
@@ -50,21 +49,18 @@ sequence_len: 4096
 sample_packing: true
 pad_to_sequence_len: true
-lora_r: 16
 lora_alpha: 16
 lora_dropout: 0.05
-lora_target_linear: true
 lora_fan_in_fan_out:
-hub_model_id: liuylhf/mixtral-lora
-# lora_target_modules:
-#   - gate_proj
-#   - down_proj
-#   - up_proj
-#   - q_proj
-#   - v_proj
-#   - k_proj
-#   - o_proj
 wandb_project: function-call
 wandb_name: mixtral-instruct-raw-data-v3
@@ -72,7 +68,7 @@ wandb_log_model: end
 gradient_accumulation_steps: 4
 micro_batch_size: 2
-num_epochs: 0.5
 optimizer: paged_adamw_8bit
 lr_scheduler: cosine
 learning_rate: 0.001
@@ -113,11 +109,9 @@ fsdp_config:
 </details><br>
-# mixtral-lora
-This model is a fine-tuned version of [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) on the None dataset.
-It achieves the following results on the evaluation set:
-- Loss: 0.1923
 ## Model description
@@ -148,23 +142,7 @@ The following hyperparameters were used during training:
 - optimizer: Adam with betas=(0.9,0.95) and epsilon=1e-05
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
-- num_epochs: 0.5
-### Training results
-| Training Loss | Epoch | Step | Validation Loss |
-|:-------------:|:-----:|:----:|:---------------:|
-| 3.2966        | 0.0   | 1    | 3.2222          |
-| 0.3736        | 0.05  | 16   | 0.3541          |
-| 0.1777        | 0.1   | 32   | 0.2357          |
-| 0.2366        | 0.16  | 48   | 0.2154          |
-| 0.1917        | 0.21  | 64   | 0.2056          |
-| 0.2213        | 0.26  | 80   | 0.2003          |
-| 0.149         | 0.31  | 96   | 0.1972          |
-| 0.1739        | 0.37  | 112  | 0.1950          |
-| 0.1668        | 0.42  | 128  | 0.1928          |
-| 0.0997        | 0.47  | 144  | 0.1923          |
 ### Framework versions

 license: apache-2.0
 library_name: peft
 tags:
 - generated_from_trainer
 base_model: mistralai/Mixtral-8x7B-Instruct-v0.1
 model-index:
+- name: mixtral-lora-less-modules
   results: []
 ---
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.01
+output_dir: ./mixtral-qlora-1-epochs-r64
 adapter: qlora
 lora_model_dir:
 sample_packing: true
 pad_to_sequence_len: true
+lora_r: 64
 lora_alpha: 16
 lora_dropout: 0.05
 lora_fan_in_fan_out:
+hub_model_id: liuylhf/mixtral-lora-less-modules
+hub_strategy: end
+lora_target_modules:
+  - q_proj
+  - v_proj
+  - k_proj
+  - o_proj
 wandb_project: function-call
 wandb_name: mixtral-instruct-raw-data-v3
 gradient_accumulation_steps: 4
 micro_batch_size: 2
+num_epochs: 1
 optimizer: paged_adamw_8bit
 lr_scheduler: cosine
 learning_rate: 0.001
 </details><br>
+# mixtral-lora-less-modules
+This model is a fine-tuned version of [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) on an unknown dataset.
 ## Model description
 - optimizer: Adam with betas=(0.9,0.95) and epsilon=1e-05
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
+- num_epochs: 1
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -15,14 +15,14 @@
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
-  "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "k_proj",
-    "v_proj",
     "o_proj",
-    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_rslora": false

   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "r": 64,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "o_proj",
+    "q_proj",
+    "v_proj",
+    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_rslora": false

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d1e3ee7a461a4f17d797b4909eef93fa2ae7c5281de3f753c9b7c529565add3
-size 54560368

 version https://git-lfs.github.com/spec/v1
+oid sha256:bd223b7576019fbc18fdb6df07a26b9b662da56d34b65c55bfe245668d413a3f
+size 218138576

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:49a4507c0a7fa5ace37d5264a12a523497d49f9a6ac3c7578d225d933fea3617
-size 5752

 version https://git-lfs.github.com/spec/v1
+oid sha256:007877440649249ac071b2c5c00afa3c113d2399d566bb3f2dab4235d750f45e
+size 5624