nexa-collaboration
/

gptqmodel-10000-g32-40tulu3if-40ultrachat-20tulu3gsm-Llama3.2-1B-instruct-4bit

PerryCheng614 commited on 26 days ago

Commit

7819792

verified ·

1 Parent(s): 3528b2e

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

config.json +62 -0
model.safetensors +3 -0
quant_log.csv +113 -0
quantize_config.json +21 -0

config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "_attn_implementation_autoset": true,
+  "_name_or_path": "/home/azureuser/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B-Instruct/snapshots/9213176726f574b556790deb65791e0c5aa438b6",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 128000,
+  "eos_token_id": [
+    128001,
+    128008,
+    128009
+  ],
+  "head_dim": 64,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 16,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "quantization_config": {
+    "bits": 4,
+    "checkpoint_format": "gptq",
+    "desc_act": true,
+    "dynamic": null,
+    "group_size": 32,
+    "lm_head": false,
+    "meta": {
+      "damp_auto_increment": 0.0025,
+      "damp_percent": 0.01,
+      "mse": 0.0,
+      "quantizer": [
+        "gptqmodel:1.5.1-dev"
+      ],
+      "static_groups": false,
+      "true_sequential": true,
+      "uri": "https://github.com/modelcloud/gptqmodel"
+    },
+    "quant_method": "gptq",
+    "sym": true
+  },
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 32.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.47.1",
+  "use_cache": true,
+  "vocab_size": 128256
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:85c070d6fc7afa65c11681d789532f1ea79328ad47b0efd3c880af854999cdab
+size 1614733472

quant_log.csv ADDED Viewed

	@@ -0,0 +1,113 @@

+layer,module,loss,damp,time
+0,self_attn.k_proj,0.03242,0.01000,0.906
+0,self_attn.v_proj,0.00080,0.01000,0.680
+0,self_attn.q_proj,0.06659,0.01000,0.697
+0,self_attn.o_proj,0.00007,0.01000,0.746
+0,mlp.up_proj,0.05609,0.01000,0.751
+0,mlp.gate_proj,0.07055,0.01000,0.817
+0,mlp.down_proj,0.00038,0.01000,3.103
+1,self_attn.k_proj,0.05143,0.01000,0.746
+1,self_attn.v_proj,0.00292,0.01000,0.728
+1,self_attn.q_proj,0.09487,0.01000,0.708
+1,self_attn.o_proj,0.00013,0.01000,0.733
+1,mlp.up_proj,0.07948,0.01000,0.788
+1,mlp.gate_proj,0.10903,0.01000,0.859
+1,mlp.down_proj,0.28001,0.01000,2.977
+2,self_attn.k_proj,0.09702,0.01000,0.750
+2,self_attn.v_proj,0.00672,0.01000,0.677
+2,self_attn.q_proj,0.18856,0.01000,0.696
+2,self_attn.o_proj,0.00022,0.01000,0.725
+2,mlp.up_proj,0.09984,0.01000,0.781
+2,mlp.gate_proj,0.15930,0.01000,0.803
+2,mlp.down_proj,0.00119,0.01000,2.998
+3,self_attn.k_proj,0.06446,0.01000,0.734
+3,self_attn.v_proj,0.00860,0.01000,0.685
+3,self_attn.q_proj,0.14038,0.01000,0.688
+3,self_attn.o_proj,0.00043,0.01000,0.744
+3,mlp.up_proj,0.12107,0.01000,0.794
+3,mlp.gate_proj,0.24166,0.01000,0.799
+3,mlp.down_proj,0.00183,0.01000,3.019
+4,self_attn.k_proj,0.06840,0.01000,0.755
+4,self_attn.v_proj,0.00803,0.01000,0.702
+4,self_attn.q_proj,0.14097,0.01000,0.707
+4,self_attn.o_proj,0.00079,0.01000,0.731
+4,mlp.up_proj,0.12585,0.01000,0.765
+4,mlp.gate_proj,0.27268,0.01000,0.796
+4,mlp.down_proj,0.00232,0.01000,2.969
+5,self_attn.k_proj,0.10500,0.01000,0.766
+5,self_attn.v_proj,0.00724,0.01000,0.684
+5,self_attn.q_proj,0.18579,0.01000,0.700
+5,self_attn.o_proj,0.00092,0.01000,0.754
+5,mlp.up_proj,0.13968,0.01000,0.765
+5,mlp.gate_proj,0.25845,0.01000,0.796
+5,mlp.down_proj,0.00277,0.01000,3.046
+6,self_attn.k_proj,0.08859,0.01000,0.727
+6,self_attn.v_proj,0.00953,0.01000,0.689
+6,self_attn.q_proj,0.14065,0.01000,0.699
+6,self_attn.o_proj,0.00129,0.01000,0.735
+6,mlp.up_proj,0.14354,0.01000,0.791
+6,mlp.gate_proj,0.25938,0.01000,0.785
+6,mlp.down_proj,0.00291,0.01000,3.035
+7,self_attn.k_proj,0.08833,0.01000,0.741
+7,self_attn.v_proj,0.01088,0.01000,0.672
+7,self_attn.q_proj,0.16550,0.01000,0.681
+7,self_attn.o_proj,0.00129,0.01000,0.746
+7,mlp.up_proj,0.15128,0.01000,0.779
+7,mlp.gate_proj,0.24538,0.01000,0.803
+7,mlp.down_proj,0.00320,0.01000,3.057
+8,self_attn.k_proj,0.10553,0.01000,0.749
+8,self_attn.v_proj,0.01056,0.01000,0.678
+8,self_attn.q_proj,0.17305,0.01000,0.717
+8,self_attn.o_proj,0.00191,0.01000,0.732
+8,mlp.up_proj,0.17159,0.01000,0.759
+8,mlp.gate_proj,0.27065,0.01000,0.804
+8,mlp.down_proj,0.00440,0.01000,3.075
+9,self_attn.k_proj,0.08821,0.01000,0.751
+9,self_attn.v_proj,0.01267,0.01000,0.671
+9,self_attn.q_proj,0.21381,0.01000,0.702
+9,self_attn.o_proj,0.00240,0.01000,0.738
+9,mlp.up_proj,0.18510,0.01000,0.789
+9,mlp.gate_proj,0.30127,0.01000,0.789
+9,mlp.down_proj,0.00538,0.01000,3.119
+10,self_attn.k_proj,0.11026,0.01000,0.753
+10,self_attn.v_proj,0.01362,0.01000,0.693
+10,self_attn.q_proj,0.22377,0.01000,0.718
+10,self_attn.o_proj,0.00176,0.01000,0.748
+10,mlp.up_proj,0.22299,0.01000,0.774
+10,mlp.gate_proj,0.35132,0.01000,0.820
+10,mlp.down_proj,0.00690,0.01000,3.018
+11,self_attn.k_proj,0.13077,0.01000,0.753
+11,self_attn.v_proj,0.01364,0.01000,0.718
+11,self_attn.q_proj,0.21940,0.01000,0.718
+11,self_attn.o_proj,0.00149,0.01000,0.758
+11,mlp.up_proj,0.25063,0.01000,0.773
+11,mlp.gate_proj,0.38616,0.01000,0.799
+11,mlp.down_proj,0.00782,0.01000,2.983
+12,self_attn.k_proj,0.12650,0.01000,0.742
+12,self_attn.v_proj,0.01431,0.01000,0.666
+12,self_attn.q_proj,0.21842,0.01000,0.733
+12,self_attn.o_proj,0.00136,0.01000,0.730
+12,mlp.up_proj,0.27024,0.01000,0.783
+12,mlp.gate_proj,0.39223,0.01000,0.795
+12,mlp.down_proj,0.00925,0.01000,2.858
+13,self_attn.k_proj,0.12914,0.01000,0.687
+13,self_attn.v_proj,0.02417,0.01000,0.633
+13,self_attn.q_proj,0.25690,0.01000,0.634
+13,self_attn.o_proj,0.00204,0.01000,0.717
+13,mlp.up_proj,0.31966,0.01000,0.770
+13,mlp.gate_proj,0.42314,0.01000,0.836
+13,mlp.down_proj,0.01332,0.01000,2.869
+14,self_attn.k_proj,0.13651,0.01000,0.691
+14,self_attn.v_proj,0.05146,0.01000,0.625
+14,self_attn.q_proj,0.26610,0.01000,0.670
+14,self_attn.o_proj,0.00420,0.01000,0.708
+14,mlp.up_proj,0.36466,0.01000,0.754
+14,mlp.gate_proj,0.52044,0.01000,0.870
+14,mlp.down_proj,0.01716,0.01000,2.821
+15,self_attn.k_proj,0.12916,0.01000,0.698
+15,self_attn.v_proj,0.05582,0.01000,0.641
+15,self_attn.q_proj,0.24866,0.01000,0.646
+15,self_attn.o_proj,0.01490,0.01000,0.705
+15,mlp.up_proj,0.43859,0.01000,0.785
+15,mlp.gate_proj,0.58076,0.01000,0.905
+15,mlp.down_proj,0.03817,0.01000,2.872

quantize_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "bits": 4,
+  "dynamic": null,
+  "group_size": 32,
+  "desc_act": true,
+  "sym": true,
+  "lm_head": false,
+  "quant_method": "gptq",
+  "checkpoint_format": "gptq",
+  "meta": {
+    "quantizer": [
+      "gptqmodel:1.5.1-dev"
+    ],
+    "uri": "https://github.com/modelcloud/gptqmodel",
+    "damp_percent": 0.01,
+    "damp_auto_increment": 0.0025,
+    "static_groups": false,
+    "true_sequential": true,
+    "mse": 0.0
+  }
+}