cpatonn commited on
Commit
e369a9f
·
verified ·
1 Parent(s): 4b530b7

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -2,6 +2,7 @@
2
  "architectures": [
3
  "Glm4vMoeForConditionalGeneration"
4
  ],
 
5
  "image_end_token_id": 151340,
6
  "image_start_token_id": 151339,
7
  "image_token_id": 151363,
@@ -9,6 +10,7 @@
9
  "quantization_config": {
10
  "config_groups": {
11
  "group_0": {
 
12
  "input_activations": null,
13
  "output_activations": null,
14
  "targets": [
@@ -20,7 +22,7 @@
20
  "dynamic": false,
21
  "group_size": 32,
22
  "num_bits": 4,
23
- "observer": "minmax",
24
  "observer_kwargs": {},
25
  "strategy": "group",
26
  "symmetric": true,
@@ -155,6 +157,148 @@
155
  "model.visual.merger.gate_proj",
156
  "model.visual.merger.up_proj",
157
  "model.visual.merger.down_proj",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  "lm_head"
159
  ],
160
  "kv_cache_scheme": null,
@@ -162,11 +306,12 @@
162
  "quantization_status": "compressed",
163
  "sparsity_config": {},
164
  "transform_config": {},
165
- "version": "0.10.3.dev33+g33c52de"
166
  },
167
  "text_config": {
168
  "attention_bias": true,
169
  "attention_dropout": 0.0,
 
170
  "eos_token_id": [
171
  151329,
172
  151336,
@@ -206,13 +351,11 @@
206
  "rope_theta": 10000.0,
207
  "routed_scaling_factor": 1.0,
208
  "topk_group": 1,
209
- "torch_dtype": "bfloat16",
210
  "use_cache": true,
211
  "use_qk_norm": false,
212
  "vocab_size": 151552
213
  },
214
  "tie_word_embeddings": false,
215
- "torch_dtype": "bfloat16",
216
  "transformers_version": "4.56.0.dev0",
217
  "video_end_token_id": 151342,
218
  "video_start_token_id": 151341,
@@ -233,7 +376,6 @@
233
  "patch_size": 14,
234
  "rms_norm_eps": 1e-05,
235
  "spatial_merge_size": 2,
236
- "temporal_patch_size": 2,
237
- "torch_dtype": "bfloat16"
238
  }
239
  }
 
2
  "architectures": [
3
  "Glm4vMoeForConditionalGeneration"
4
  ],
5
+ "dtype": "bfloat16",
6
  "image_end_token_id": 151340,
7
  "image_start_token_id": 151339,
8
  "image_token_id": 151363,
 
10
  "quantization_config": {
11
  "config_groups": {
12
  "group_0": {
13
+ "format": "pack-quantized",
14
  "input_activations": null,
15
  "output_activations": null,
16
  "targets": [
 
22
  "dynamic": false,
23
  "group_size": 32,
24
  "num_bits": 4,
25
+ "observer": "mse",
26
  "observer_kwargs": {},
27
  "strategy": "group",
28
  "symmetric": true,
 
157
  "model.visual.merger.gate_proj",
158
  "model.visual.merger.up_proj",
159
  "model.visual.merger.down_proj",
160
+ "model.language_model.layers.0.self_attn.q_proj",
161
+ "model.language_model.layers.0.self_attn.k_proj",
162
+ "model.language_model.layers.0.self_attn.v_proj",
163
+ "model.language_model.layers.0.self_attn.o_proj",
164
+ "model.language_model.layers.0.mlp.gate_proj",
165
+ "model.language_model.layers.0.mlp.up_proj",
166
+ "model.language_model.layers.0.mlp.down_proj",
167
+ "model.language_model.layers.1.mlp.shared_experts.gate_proj",
168
+ "model.language_model.layers.1.mlp.shared_experts.up_proj",
169
+ "model.language_model.layers.1.mlp.shared_experts.down_proj",
170
+ "model.language_model.layers.2.mlp.shared_experts.gate_proj",
171
+ "model.language_model.layers.2.mlp.shared_experts.up_proj",
172
+ "model.language_model.layers.2.mlp.shared_experts.down_proj",
173
+ "model.language_model.layers.3.mlp.shared_experts.gate_proj",
174
+ "model.language_model.layers.3.mlp.shared_experts.up_proj",
175
+ "model.language_model.layers.3.mlp.shared_experts.down_proj",
176
+ "model.language_model.layers.4.mlp.shared_experts.gate_proj",
177
+ "model.language_model.layers.4.mlp.shared_experts.up_proj",
178
+ "model.language_model.layers.4.mlp.shared_experts.down_proj",
179
+ "model.language_model.layers.5.mlp.shared_experts.gate_proj",
180
+ "model.language_model.layers.5.mlp.shared_experts.up_proj",
181
+ "model.language_model.layers.5.mlp.shared_experts.down_proj",
182
+ "model.language_model.layers.6.mlp.shared_experts.gate_proj",
183
+ "model.language_model.layers.6.mlp.shared_experts.up_proj",
184
+ "model.language_model.layers.6.mlp.shared_experts.down_proj",
185
+ "model.language_model.layers.7.mlp.shared_experts.gate_proj",
186
+ "model.language_model.layers.7.mlp.shared_experts.up_proj",
187
+ "model.language_model.layers.7.mlp.shared_experts.down_proj",
188
+ "model.language_model.layers.8.mlp.shared_experts.gate_proj",
189
+ "model.language_model.layers.8.mlp.shared_experts.up_proj",
190
+ "model.language_model.layers.8.mlp.shared_experts.down_proj",
191
+ "model.language_model.layers.9.mlp.shared_experts.gate_proj",
192
+ "model.language_model.layers.9.mlp.shared_experts.up_proj",
193
+ "model.language_model.layers.9.mlp.shared_experts.down_proj",
194
+ "model.language_model.layers.10.mlp.shared_experts.gate_proj",
195
+ "model.language_model.layers.10.mlp.shared_experts.up_proj",
196
+ "model.language_model.layers.10.mlp.shared_experts.down_proj",
197
+ "model.language_model.layers.11.mlp.shared_experts.gate_proj",
198
+ "model.language_model.layers.11.mlp.shared_experts.up_proj",
199
+ "model.language_model.layers.11.mlp.shared_experts.down_proj",
200
+ "model.language_model.layers.12.mlp.shared_experts.gate_proj",
201
+ "model.language_model.layers.12.mlp.shared_experts.up_proj",
202
+ "model.language_model.layers.12.mlp.shared_experts.down_proj",
203
+ "model.language_model.layers.13.mlp.shared_experts.gate_proj",
204
+ "model.language_model.layers.13.mlp.shared_experts.up_proj",
205
+ "model.language_model.layers.13.mlp.shared_experts.down_proj",
206
+ "model.language_model.layers.14.mlp.shared_experts.gate_proj",
207
+ "model.language_model.layers.14.mlp.shared_experts.up_proj",
208
+ "model.language_model.layers.14.mlp.shared_experts.down_proj",
209
+ "model.language_model.layers.15.mlp.shared_experts.gate_proj",
210
+ "model.language_model.layers.15.mlp.shared_experts.up_proj",
211
+ "model.language_model.layers.15.mlp.shared_experts.down_proj",
212
+ "model.language_model.layers.16.mlp.shared_experts.gate_proj",
213
+ "model.language_model.layers.16.mlp.shared_experts.up_proj",
214
+ "model.language_model.layers.16.mlp.shared_experts.down_proj",
215
+ "model.language_model.layers.17.mlp.shared_experts.gate_proj",
216
+ "model.language_model.layers.17.mlp.shared_experts.up_proj",
217
+ "model.language_model.layers.17.mlp.shared_experts.down_proj",
218
+ "model.language_model.layers.18.mlp.shared_experts.gate_proj",
219
+ "model.language_model.layers.18.mlp.shared_experts.up_proj",
220
+ "model.language_model.layers.18.mlp.shared_experts.down_proj",
221
+ "model.language_model.layers.19.mlp.shared_experts.gate_proj",
222
+ "model.language_model.layers.19.mlp.shared_experts.up_proj",
223
+ "model.language_model.layers.19.mlp.shared_experts.down_proj",
224
+ "model.language_model.layers.20.mlp.shared_experts.gate_proj",
225
+ "model.language_model.layers.20.mlp.shared_experts.up_proj",
226
+ "model.language_model.layers.20.mlp.shared_experts.down_proj",
227
+ "model.language_model.layers.21.mlp.shared_experts.gate_proj",
228
+ "model.language_model.layers.21.mlp.shared_experts.up_proj",
229
+ "model.language_model.layers.21.mlp.shared_experts.down_proj",
230
+ "model.language_model.layers.22.mlp.shared_experts.gate_proj",
231
+ "model.language_model.layers.22.mlp.shared_experts.up_proj",
232
+ "model.language_model.layers.22.mlp.shared_experts.down_proj",
233
+ "model.language_model.layers.23.mlp.shared_experts.gate_proj",
234
+ "model.language_model.layers.23.mlp.shared_experts.up_proj",
235
+ "model.language_model.layers.23.mlp.shared_experts.down_proj",
236
+ "model.language_model.layers.24.mlp.shared_experts.gate_proj",
237
+ "model.language_model.layers.24.mlp.shared_experts.up_proj",
238
+ "model.language_model.layers.24.mlp.shared_experts.down_proj",
239
+ "model.language_model.layers.25.mlp.shared_experts.gate_proj",
240
+ "model.language_model.layers.25.mlp.shared_experts.up_proj",
241
+ "model.language_model.layers.25.mlp.shared_experts.down_proj",
242
+ "model.language_model.layers.26.mlp.shared_experts.gate_proj",
243
+ "model.language_model.layers.26.mlp.shared_experts.up_proj",
244
+ "model.language_model.layers.26.mlp.shared_experts.down_proj",
245
+ "model.language_model.layers.27.mlp.shared_experts.gate_proj",
246
+ "model.language_model.layers.27.mlp.shared_experts.up_proj",
247
+ "model.language_model.layers.27.mlp.shared_experts.down_proj",
248
+ "model.language_model.layers.28.mlp.shared_experts.gate_proj",
249
+ "model.language_model.layers.28.mlp.shared_experts.up_proj",
250
+ "model.language_model.layers.28.mlp.shared_experts.down_proj",
251
+ "model.language_model.layers.29.mlp.shared_experts.gate_proj",
252
+ "model.language_model.layers.29.mlp.shared_experts.up_proj",
253
+ "model.language_model.layers.29.mlp.shared_experts.down_proj",
254
+ "model.language_model.layers.30.mlp.shared_experts.gate_proj",
255
+ "model.language_model.layers.30.mlp.shared_experts.up_proj",
256
+ "model.language_model.layers.30.mlp.shared_experts.down_proj",
257
+ "model.language_model.layers.31.mlp.shared_experts.gate_proj",
258
+ "model.language_model.layers.31.mlp.shared_experts.up_proj",
259
+ "model.language_model.layers.31.mlp.shared_experts.down_proj",
260
+ "model.language_model.layers.32.mlp.shared_experts.gate_proj",
261
+ "model.language_model.layers.32.mlp.shared_experts.up_proj",
262
+ "model.language_model.layers.32.mlp.shared_experts.down_proj",
263
+ "model.language_model.layers.33.mlp.shared_experts.gate_proj",
264
+ "model.language_model.layers.33.mlp.shared_experts.up_proj",
265
+ "model.language_model.layers.33.mlp.shared_experts.down_proj",
266
+ "model.language_model.layers.34.mlp.shared_experts.gate_proj",
267
+ "model.language_model.layers.34.mlp.shared_experts.up_proj",
268
+ "model.language_model.layers.34.mlp.shared_experts.down_proj",
269
+ "model.language_model.layers.35.mlp.shared_experts.gate_proj",
270
+ "model.language_model.layers.35.mlp.shared_experts.up_proj",
271
+ "model.language_model.layers.35.mlp.shared_experts.down_proj",
272
+ "model.language_model.layers.36.mlp.shared_experts.gate_proj",
273
+ "model.language_model.layers.36.mlp.shared_experts.up_proj",
274
+ "model.language_model.layers.36.mlp.shared_experts.down_proj",
275
+ "model.language_model.layers.37.mlp.shared_experts.gate_proj",
276
+ "model.language_model.layers.37.mlp.shared_experts.up_proj",
277
+ "model.language_model.layers.37.mlp.shared_experts.down_proj",
278
+ "model.language_model.layers.38.mlp.shared_experts.gate_proj",
279
+ "model.language_model.layers.38.mlp.shared_experts.up_proj",
280
+ "model.language_model.layers.38.mlp.shared_experts.down_proj",
281
+ "model.language_model.layers.39.mlp.shared_experts.gate_proj",
282
+ "model.language_model.layers.39.mlp.shared_experts.up_proj",
283
+ "model.language_model.layers.39.mlp.shared_experts.down_proj",
284
+ "model.language_model.layers.40.mlp.shared_experts.gate_proj",
285
+ "model.language_model.layers.40.mlp.shared_experts.up_proj",
286
+ "model.language_model.layers.40.mlp.shared_experts.down_proj",
287
+ "model.language_model.layers.41.mlp.shared_experts.gate_proj",
288
+ "model.language_model.layers.41.mlp.shared_experts.up_proj",
289
+ "model.language_model.layers.41.mlp.shared_experts.down_proj",
290
+ "model.language_model.layers.42.mlp.shared_experts.gate_proj",
291
+ "model.language_model.layers.42.mlp.shared_experts.up_proj",
292
+ "model.language_model.layers.42.mlp.shared_experts.down_proj",
293
+ "model.language_model.layers.43.mlp.shared_experts.gate_proj",
294
+ "model.language_model.layers.43.mlp.shared_experts.up_proj",
295
+ "model.language_model.layers.43.mlp.shared_experts.down_proj",
296
+ "model.language_model.layers.44.mlp.shared_experts.gate_proj",
297
+ "model.language_model.layers.44.mlp.shared_experts.up_proj",
298
+ "model.language_model.layers.44.mlp.shared_experts.down_proj",
299
+ "model.language_model.layers.45.mlp.shared_experts.gate_proj",
300
+ "model.language_model.layers.45.mlp.shared_experts.up_proj",
301
+ "model.language_model.layers.45.mlp.shared_experts.down_proj",
302
  "lm_head"
303
  ],
304
  "kv_cache_scheme": null,
 
306
  "quantization_status": "compressed",
307
  "sparsity_config": {},
308
  "transform_config": {},
309
+ "version": "0.11.1.a20250828"
310
  },
311
  "text_config": {
312
  "attention_bias": true,
313
  "attention_dropout": 0.0,
314
+ "dtype": "bfloat16",
315
  "eos_token_id": [
316
  151329,
317
  151336,
 
351
  "rope_theta": 10000.0,
352
  "routed_scaling_factor": 1.0,
353
  "topk_group": 1,
 
354
  "use_cache": true,
355
  "use_qk_norm": false,
356
  "vocab_size": 151552
357
  },
358
  "tie_word_embeddings": false,
 
359
  "transformers_version": "4.56.0.dev0",
360
  "video_end_token_id": 151342,
361
  "video_start_token_id": 151341,
 
376
  "patch_size": 14,
377
  "rms_norm_eps": 1e-05,
378
  "spatial_merge_size": 2,
379
+ "temporal_patch_size": 2
 
380
  }
381
  }
model-00001-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4357d60ff461c2acfe7b7cb562a6bc12b8ddfec1ee792ae9636603ca3601458b
3
+ size 4999540600
model-00002-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f51fd8ead335874afab8f236aef173420bc98c4438816d3678e9e4d6fec12ba
3
+ size 4998839240
model-00003-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0376c9975cdc2232128d3c434ddf29906ade0be93039871a8ef594a982b8d78
3
+ size 4998547048
model-00004-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c834b0796060a271197eb813ed0ca783957d325e85d83453750005ba6b06d71b
3
+ size 4998551072
model-00005-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cbb2e318c178bf4f56ad776c272986733d24a67a5a68109613a6f8001a2f2bf
3
+ size 4998551624
model-00006-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cdcf289b9ef58a701030c27dbfcca697e8399ce2cfc44b9ce28a24d53bba3b6
3
+ size 4998843432
model-00007-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b70784b17a8b038c69b1884222fc49066822bdb2292a1879d0724e50d0da3eea
3
+ size 4998551536
model-00008-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3efb29f4544290a6629b244aa2f2ba87ec5bf00459aa6b7bbebe0232b3551a8
3
+ size 4998551536
model-00009-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9bc59075c55df01634507cbb424c4a3de6da79b63a0bd09c8b2faa236aa461b
3
+ size 4998843728
model-00010-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce4f03f6780f2add935254ebba3a76abc37a39ad5227d42e577f95cd2e221351
3
+ size 4998551336
model-00011-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2c67db321f01d649ec7498440b9c64b21f49b2b8308cae42d8a4a0d8d2da7ea
3
+ size 4998551536
model-00012-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:935d6fcb0c230a5574ddf200c4828fdb3f765e8fa6604552e911e19f99f07d7f
3
+ size 4998551552
model-00013-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d7139e7933acf7b77864ce4d4d26c212230a3ad52fa2a1b73d70255fbbbfb8e
3
+ size 3892761408
model-00014-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a8d73275d82ef4305b377529c2426ad55ca13f52d208a15947efde7d8684f0e
3
+ size 1241514112
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
recipe.yaml CHANGED
@@ -13,12 +13,15 @@ default_stage:
13
  block_structure: null
14
  dynamic: false
15
  actorder: null
16
- observer: minmax
17
  observer_kwargs: {}
18
  input_activations: null
19
  output_activations: null
 
20
  targets: [Linear]
21
- ignore: [lm_head, 're:.*visual.*']
 
 
22
  mappings:
23
  - smooth_layer: re:.*input_layernorm$
24
  balance_layers: ['re:.*q_proj$', 're:.*k_proj$', 're:.*v_proj$']
@@ -28,4 +31,5 @@ default_stage:
28
  balance_layers: ['re:.*gate_proj$', 're:.*up_proj$']
29
  - smooth_layer: re:.*up_proj$
30
  balance_layers: ['re:.*down_proj$']
 
31
  duo_scaling: true
 
13
  block_structure: null
14
  dynamic: false
15
  actorder: null
16
+ observer: mse
17
  observer_kwargs: {}
18
  input_activations: null
19
  output_activations: null
20
+ format: null
21
  targets: [Linear]
22
+ ignore: [lm_head, 're:.*embed_tokens', 're:.*input_layernorm', 're:.*post_attention_layernorm',
23
+ model.language_model.norm, 're:.*shared_experts.*', 're:model.language_model.layers.0.*',
24
+ 're:.*mlp.gate', 're:model.visual.*']
25
  mappings:
26
  - smooth_layer: re:.*input_layernorm$
27
  balance_layers: ['re:.*q_proj$', 're:.*k_proj$', 're:.*v_proj$']
 
31
  balance_layers: ['re:.*gate_proj$', 're:.*up_proj$']
32
  - smooth_layer: re:.*up_proj$
33
  balance_layers: ['re:.*down_proj$']
34
+ offload_device: !!python/object/apply:torch.device [cpu]
35
  duo_scaling: true
video_preprocessor_config.json CHANGED
@@ -31,6 +31,7 @@
31
  "processor_class": "Glm4vProcessor",
32
  "resample": 3,
33
  "rescale_factor": 0.00392156862745098,
 
34
  "size": {
35
  "longest_edge": 47040000,
36
  "shortest_edge": 12544
 
31
  "processor_class": "Glm4vProcessor",
32
  "resample": 3,
33
  "rescale_factor": 0.00392156862745098,
34
+ "return_metadata": false,
35
  "size": {
36
  "longest_edge": 47040000,
37
  "shortest_edge": 12544