NimaZahedinameghi
/

WHI

Generated from Trainer

8-bit precision

Model card Files Files and versions Community

NimaZahedinameghi commited on Aug 6, 2024

Commit

ba3a992

·

verified ·

1 Parent(s): e2520fa

Update README.md

Files changed (1) hide show

README.md +69 -15

README.md CHANGED Viewed

@@ -49,22 +49,76 @@ The model was fine-tuned on a custom dataset (`incident_descriptions.json`) cont
 The model was fine-tuned using the Axolotl framework with the following configuration:
-```json
 {
-  "_name_or_path": "mistralai/Mistral-7B-v0.1",
-  "architectures": ["MistralForCausalLM"],
-  "attention_dropout": 0.0,
-  "hidden_size": 4096,
-  "num_attention_heads": 32,
-  "num_hidden_layers": 32,
-  "num_key_value_heads": 8,
-  "quantization_config": {
-    "load_in_8bit": true,
-    "quant_method": "bitsandbytes"
-  },
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.42.4",
-  "use_cache": false
 }
 ```

 The model was fine-tuned using the Axolotl framework with the following configuration:
+```yaml
 {
+base_model: mistralai/Mistral-7B-v0.1
+model_type: MistralForCausalLM
+tokenizer_type: LlamaTokenizer
+load_in_8bit: true
+load_in_4bit: false
+strict: false
+adapter: lora
+lora_model_dir:
+sequence_len: 8192
+sample_packing: False
+pad_to_sequence_len: true
+lora_r: 32
+lora_alpha: 16
+lora_dropout: 0.05
+lora_target_linear: true
+lora_fan_in_fan_out:
+lora_target_modules:
+  - gate_proj
+  - down_proj
+  - up_proj
+  - q_proj
+  - v_proj
+  - k_proj
+  - o_proj
+gradient_accumulation_steps: 4
+micro_batch_size: 2
+num_epochs: 2
+optimizer: adamw_bnb_8bit
+lr_scheduler: cosine
+learning_rate: 0.0002
+train_on_inputs: false
+group_by_length: false
+bf16: auto
+fp16:
+tf32: false
+gradient_checkpointing: true
+early_stopping_patience:
+resume_from_checkpoint:
+local_rank:
+logging_steps: 1
+xformers_attention:
+flash_attention: true
+loss_watchdog_threshold: 5.0
+loss_watchdog_patience: 3
+warmup_steps: 10
+evals_per_epoch: 4
+eval_table_size:
+eval_max_new_tokens: 128
+saves_per_epoch: 1
+debug:
+deepspeed:
+weight_decay: 0.0
+fsdp:
+fsdp_config:
+special_tokens:
+  bos_token: "<s>"
+  eos_token: "</s>"
+  unk_token: "<unk>"
+save_safetensors: true
 }
 ```