Update README.md
Browse files
README.md
CHANGED
@@ -59,11 +59,37 @@ While I'm a strong supporter for fully open-source community, I have to respect
|
|
59 |
|
60 |
## Training Parameters
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
## Limitations
|
69 |
|
|
|
59 |
|
60 |
## Training Parameters
|
61 |
|
62 |
+
- r = 256
|
63 |
+
- target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
|
64 |
+
- "gate_proj", "up_proj", "down_proj",
|
65 |
+
- "lm_head", "embed_tokens",]
|
66 |
+
- lora_alpha = 32
|
67 |
+
- lora_dropout = 0
|
68 |
+
- bias = "none"
|
69 |
+
- use_gradient_checkpointing = "unsloth"
|
70 |
+
- random_state = 3407
|
71 |
+
- use_rslora = True
|
72 |
+
- use_dora = False
|
73 |
+
- loftq_config = None
|
74 |
+
|
75 |
+
- per_device_train_batch_size = 1
|
76 |
+
- gradient_accumulation_steps = 16
|
77 |
+
- warmup_ratio = 0.1
|
78 |
+
- num_train_epochs = 3
|
79 |
+
- learning_rate = 5e-5
|
80 |
+
- embedding_learning_rate = 5e-6
|
81 |
+
- max_steps = 0
|
82 |
+
- group_by_length = False
|
83 |
+
- bf16 = true
|
84 |
+
- weight_decay = 0.01
|
85 |
+
- max_grad_norm = 8.0
|
86 |
+
- lr_scheduler_type = "cosine"
|
87 |
+
- optim = "paged_adamw_8bit"
|
88 |
+
- seed = 3407
|
89 |
+
|
90 |
+
## Recommended Hyperparameters
|
91 |
+
|
92 |
+
All samplers neutralised, with min_p set to 0.1. Make sure the temperature is applied last.
|
93 |
|
94 |
## Limitations
|
95 |
|