Update 4bit notes
Browse files
README.md
CHANGED
|
@@ -136,7 +136,7 @@ See sample configs in [configs](configs) folder or [examples](examples) for quic
|
|
| 136 |
|
| 137 |
- loading
|
| 138 |
```yaml
|
| 139 |
-
|
| 140 |
load_in_8bit: true
|
| 141 |
bf16: true # require >=ampere
|
| 142 |
fp16: true
|
|
@@ -175,13 +175,15 @@ tokenizer_type: AutoTokenizer
|
|
| 175 |
# Trust remote code for untrusted source
|
| 176 |
trust_remote_code:
|
| 177 |
|
| 178 |
-
# whether you are training a 4-bit quantized model
|
| 179 |
load_4bit: true
|
| 180 |
gptq_groupsize: 128 # group size
|
| 181 |
gptq_model_v1: false # v1 or v2
|
| 182 |
|
| 183 |
# this will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer
|
| 184 |
load_in_8bit: true
|
|
|
|
|
|
|
| 185 |
|
| 186 |
# Use CUDA bf16
|
| 187 |
bf16: true # bool or 'full' for `bf16_full_eval`. require >=ampere
|
|
|
|
| 136 |
|
| 137 |
- loading
|
| 138 |
```yaml
|
| 139 |
+
load_in_4bit: true
|
| 140 |
load_in_8bit: true
|
| 141 |
bf16: true # require >=ampere
|
| 142 |
fp16: true
|
|
|
|
| 175 |
# Trust remote code for untrusted source
|
| 176 |
trust_remote_code:
|
| 177 |
|
| 178 |
+
# whether you are training a 4-bit GPTQ quantized model
|
| 179 |
load_4bit: true
|
| 180 |
gptq_groupsize: 128 # group size
|
| 181 |
gptq_model_v1: false # v1 or v2
|
| 182 |
|
| 183 |
# this will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer
|
| 184 |
load_in_8bit: true
|
| 185 |
+
# use bitsandbytes 4 bit
|
| 186 |
+
load_in_4bit:
|
| 187 |
|
| 188 |
# Use CUDA bf16
|
| 189 |
bf16: true # bool or 'full' for `bf16_full_eval`. require >=ampere
|