NimaZahedinameghi commited on
Commit
ba3a992
·
verified ·
1 Parent(s): e2520fa

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +69 -15
README.md CHANGED
@@ -49,22 +49,76 @@ The model was fine-tuned on a custom dataset (`incident_descriptions.json`) cont
49
 
50
  The model was fine-tuned using the Axolotl framework with the following configuration:
51
 
52
- ```json
53
  {
54
- "_name_or_path": "mistralai/Mistral-7B-v0.1",
55
- "architectures": ["MistralForCausalLM"],
56
- "attention_dropout": 0.0,
57
- "hidden_size": 4096,
58
- "num_attention_heads": 32,
59
- "num_hidden_layers": 32,
60
- "num_key_value_heads": 8,
61
- "quantization_config": {
62
- "load_in_8bit": true,
63
- "quant_method": "bitsandbytes"
64
- },
65
- "torch_dtype": "bfloat16",
66
- "transformers_version": "4.42.4",
67
- "use_cache": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  }
69
  ```
70
 
 
49
 
50
  The model was fine-tuned using the Axolotl framework with the following configuration:
51
 
52
+ ```yaml
53
  {
54
+ base_model: mistralai/Mistral-7B-v0.1
55
+ model_type: MistralForCausalLM
56
+ tokenizer_type: LlamaTokenizer
57
+
58
+ load_in_8bit: true
59
+ load_in_4bit: false
60
+ strict: false
61
+
62
+ adapter: lora
63
+ lora_model_dir:
64
+
65
+ sequence_len: 8192
66
+ sample_packing: False
67
+ pad_to_sequence_len: true
68
+
69
+ lora_r: 32
70
+ lora_alpha: 16
71
+ lora_dropout: 0.05
72
+ lora_target_linear: true
73
+ lora_fan_in_fan_out:
74
+ lora_target_modules:
75
+ - gate_proj
76
+ - down_proj
77
+ - up_proj
78
+ - q_proj
79
+ - v_proj
80
+ - k_proj
81
+ - o_proj
82
+
83
+ gradient_accumulation_steps: 4
84
+ micro_batch_size: 2
85
+ num_epochs: 2
86
+ optimizer: adamw_bnb_8bit
87
+ lr_scheduler: cosine
88
+ learning_rate: 0.0002
89
+
90
+ train_on_inputs: false
91
+ group_by_length: false
92
+ bf16: auto
93
+ fp16:
94
+ tf32: false
95
+
96
+ gradient_checkpointing: true
97
+ early_stopping_patience:
98
+ resume_from_checkpoint:
99
+ local_rank:
100
+ logging_steps: 1
101
+ xformers_attention:
102
+ flash_attention: true
103
+
104
+ loss_watchdog_threshold: 5.0
105
+ loss_watchdog_patience: 3
106
+
107
+ warmup_steps: 10
108
+ evals_per_epoch: 4
109
+ eval_table_size:
110
+ eval_max_new_tokens: 128
111
+ saves_per_epoch: 1
112
+ debug:
113
+ deepspeed:
114
+ weight_decay: 0.0
115
+ fsdp:
116
+ fsdp_config:
117
+ special_tokens:
118
+ bos_token: "<s>"
119
+ eos_token: "</s>"
120
+ unk_token: "<unk>"
121
+ save_safetensors: true
122
  }
123
  ```
124