OPEA
/

Safetensors
molmo
custom_code
4-bit precision
intel/auto-round
n1ck-guo commited on
Commit
e64d453
·
1 Parent(s): 4477ac1

upload autogptq format

Browse files

Signed-off-by: n1ck-guo <[email protected]>

README.md DELETED
@@ -1,3 +0,0 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
config.json CHANGED
@@ -25,11 +25,12 @@
25
  "quantization_config": {
26
  "amp": true,
27
  "autoround_version": "0.4.4",
28
- "backend": "auto_round:gptq:exllamav2",
29
  "batch_size": 8,
30
  "bits": 4,
 
 
31
  "data_type": "int",
32
- "dataset": "NeelNanda/pile-10k",
33
  "enable_minmax_tuning": true,
34
  "enable_norm_bias_tuning": false,
35
  "enable_quanted_input": true,
@@ -40,11 +41,11 @@
40
  "lr": 0.001,
41
  "minmax_lr": 0.001,
42
  "nsamples": 512,
43
- "quant_method": "intel/auto-round",
44
  "scale_dtype": "torch.float16",
45
  "seqlen": 2048,
46
  "sym": true,
47
- "to_quant_block_names": "model.transformer.blocks"
48
  },
49
  "rope_theta": 1000000.0,
50
  "tie_word_embeddings": false,
 
25
  "quantization_config": {
26
  "amp": true,
27
  "autoround_version": "0.4.4",
 
28
  "batch_size": 8,
29
  "bits": 4,
30
+ "block_name_to_quantize": "model.transformer.blocks",
31
+ "damp_percent": 0.01,
32
  "data_type": "int",
33
+ "desc_act": false,
34
  "enable_minmax_tuning": true,
35
  "enable_norm_bias_tuning": false,
36
  "enable_quanted_input": true,
 
41
  "lr": 0.001,
42
  "minmax_lr": 0.001,
43
  "nsamples": 512,
44
+ "quant_method": "gptq",
45
  "scale_dtype": "torch.float16",
46
  "seqlen": 2048,
47
  "sym": true,
48
+ "true_sequential": false
49
  },
50
  "rope_theta": 1000000.0,
51
  "tie_word_embeddings": false,
quantization_config.json → quantize_config.json RENAMED
@@ -15,10 +15,11 @@
15
  "amp": true,
16
  "nsamples": 512,
17
  "low_gpu_mem_usage": false,
18
- "to_quant_block_names": "model.transformer.blocks",
19
  "enable_norm_bias_tuning": false,
20
- "dataset": "NeelNanda/pile-10k",
21
  "autoround_version": "0.4.4",
22
- "quant_method": "intel/auto-round",
23
- "backend": "auto_round:gptq:exllamav2"
 
 
 
24
  }
 
15
  "amp": true,
16
  "nsamples": 512,
17
  "low_gpu_mem_usage": false,
 
18
  "enable_norm_bias_tuning": false,
 
19
  "autoround_version": "0.4.4",
20
+ "block_name_to_quantize": "model.transformer.blocks",
21
+ "quant_method": "gptq",
22
+ "desc_act": false,
23
+ "true_sequential": false,
24
+ "damp_percent": 0.01
25
  }