tomar753 commited on
Commit
e4b3df6
·
verified ·
1 Parent(s): 427439d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +31 -5
README.md CHANGED
@@ -59,11 +59,37 @@ While I'm a strong supporter for fully open-source community, I have to respect
59
 
60
  ## Training Parameters
61
 
62
- [TO BE FILLED]
63
-
64
- ## Recommended Settings
65
-
66
- [TO BE FILLED]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  ## Limitations
69
 
 
59
 
60
  ## Training Parameters
61
 
62
+ - r = 256
63
+ - target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
64
+ - "gate_proj", "up_proj", "down_proj",
65
+ - "lm_head", "embed_tokens",]
66
+ - lora_alpha = 32
67
+ - lora_dropout = 0
68
+ - bias = "none"
69
+ - use_gradient_checkpointing = "unsloth"
70
+ - random_state = 3407
71
+ - use_rslora = True
72
+ - use_dora = False
73
+ - loftq_config = None
74
+
75
+ - per_device_train_batch_size = 1
76
+ - gradient_accumulation_steps = 16
77
+ - warmup_ratio = 0.1
78
+ - num_train_epochs = 3
79
+ - learning_rate = 5e-5
80
+ - embedding_learning_rate = 5e-6
81
+ - max_steps = 0
82
+ - group_by_length = False
83
+ - bf16 = true
84
+ - weight_decay = 0.01
85
+ - max_grad_norm = 8.0
86
+ - lr_scheduler_type = "cosine"
87
+ - optim = "paged_adamw_8bit"
88
+ - seed = 3407
89
+
90
+ ## Recommended Hyperparameters
91
+
92
+ All samplers neutralised, with min_p set to 0.1. Make sure the temperature is applied last.
93
 
94
  ## Limitations
95