bf16 to fp16
Browse files- Untitled.ipynb +175 -0
- config.json +2 -2
- model.safetensors +2 -2
- model_fp16.safetensors +3 -0
- quant_log.csv +168 -168
- untitled.py +15 -0
Untitled.ipynb
ADDED
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"id": "6363d4dd-5c18-4652-b682-bf54a468aae5",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [],
|
9 |
+
"source": [
|
10 |
+
"import torch\n",
|
11 |
+
"from safetensors import safe_open\n",
|
12 |
+
"from safetensors.torch import save_file\n",
|
13 |
+
"\n",
|
14 |
+
"# 加载 safetensors 文件\n",
|
15 |
+
"model_path = \"model.safetensors\"\n",
|
16 |
+
"with safe_open(model_path, framework=\"pt\", device=\"cpu\") as f:\n",
|
17 |
+
" state_dict = {key: f.get_tensor(key) for key in f.keys()}\n",
|
18 |
+
"\n",
|
19 |
+
"# 将 BF16 转换为 FP16\n",
|
20 |
+
"fp16_state_dict = {key: value.to(torch.float16) for key, value in state_dict.items()}\n",
|
21 |
+
"\n",
|
22 |
+
"# 保存为新的 safetensors 文件\n",
|
23 |
+
"output_path = \"model_fp16.safetensors\"\n",
|
24 |
+
"save_file(fp16_state_dict, output_path)"
|
25 |
+
]
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"cell_type": "code",
|
29 |
+
"execution_count": 4,
|
30 |
+
"id": "9aa60eaa-883a-465e-958a-3280e0439f66",
|
31 |
+
"metadata": {},
|
32 |
+
"outputs": [
|
33 |
+
{
|
34 |
+
"ename": "PackageNotFoundError",
|
35 |
+
"evalue": "No package metadata was found for auto-gptq",
|
36 |
+
"output_type": "error",
|
37 |
+
"traceback": [
|
38 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
39 |
+
"\u001b[0;31mStopIteration\u001b[0m Traceback (most recent call last)",
|
40 |
+
"File \u001b[0;32m~/miniconda3/lib/python3.12/importlib/metadata/__init__.py:397\u001b[0m, in \u001b[0;36mDistribution.from_name\u001b[0;34m(cls, name)\u001b[0m\n\u001b[1;32m 396\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 397\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mnext\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdiscover\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 398\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m:\n",
|
41 |
+
"\u001b[0;31mStopIteration\u001b[0m: ",
|
42 |
+
"\nDuring handling of the above exception, another exception occurred:\n",
|
43 |
+
"\u001b[0;31mPackageNotFoundError\u001b[0m Traceback (most recent call last)",
|
44 |
+
"Cell \u001b[0;32mIn[4], line 7\u001b[0m\n\u001b[1;32m 4\u001b[0m model_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# 加载模型,显式指定 torch_dtype\u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mAutoModelForCausalLM\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mtorch_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloat16\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# 或 torch.float16,与权重一致\u001b[39;49;00m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mauto\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# 可选,自动分配设备\u001b[39;49;00m\n\u001b[1;32m 11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;66;03m# 加载分词器\u001b[39;00m\n\u001b[1;32m 14\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(model_path)\n",
|
45 |
+
"File \u001b[0;32m~/miniconda3/lib/python3.12/site-packages/transformers/models/auto/auto_factory.py:564\u001b[0m, in \u001b[0;36m_BaseAutoModelClass.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 562\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(config) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 563\u001b[0m model_class \u001b[38;5;241m=\u001b[39m _get_model_class(config, \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping)\n\u001b[0;32m--> 564\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_class\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 565\u001b[0m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mhub_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 566\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 567\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 568\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnrecognized configuration class \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for this kind of AutoModel: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 569\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel type should be one of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(c\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mc\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 570\u001b[0m )\n",
|
46 |
+
"File \u001b[0;32m~/miniconda3/lib/python3.12/site-packages/transformers/modeling_utils.py:3669\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 3666\u001b[0m hf_quantizer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 3668\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m hf_quantizer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 3669\u001b[0m \u001b[43mhf_quantizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate_environment\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3670\u001b[0m \u001b[43m \u001b[49m\u001b[43mtorch_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtorch_dtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3671\u001b[0m \u001b[43m \u001b[49m\u001b[43mfrom_tf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfrom_tf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3672\u001b[0m \u001b[43m \u001b[49m\u001b[43mfrom_flax\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfrom_flax\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3673\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice_map\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3674\u001b[0m \u001b[43m \u001b[49m\u001b[43mweights_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweights_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3675\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3676\u001b[0m torch_dtype \u001b[38;5;241m=\u001b[39m hf_quantizer\u001b[38;5;241m.\u001b[39mupdate_torch_dtype(torch_dtype)\n\u001b[1;32m 3677\u001b[0m device_map \u001b[38;5;241m=\u001b[39m hf_quantizer\u001b[38;5;241m.\u001b[39mupdate_device_map(device_map)\n",
|
47 |
+
"File \u001b[0;32m~/miniconda3/lib/python3.12/site-packages/transformers/quantizers/quantizer_gptq.py:52\u001b[0m, in \u001b[0;36mGptqHfQuantizer.validate_environment\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mvalidate_environment\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m---> 52\u001b[0m gptq_supports_cpu \u001b[38;5;241m=\u001b[39m version\u001b[38;5;241m.\u001b[39mparse(\u001b[43mimportlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmetadata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mversion\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mauto-gptq\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m) \u001b[38;5;241m>\u001b[39m version\u001b[38;5;241m.\u001b[39mparse(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m0.4.2\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 53\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m gptq_supports_cpu \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;241m.\u001b[39mis_available():\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGPU is required to quantize or run quantize model.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
|
48 |
+
"File \u001b[0;32m~/miniconda3/lib/python3.12/importlib/metadata/__init__.py:889\u001b[0m, in \u001b[0;36mversion\u001b[0;34m(distribution_name)\u001b[0m\n\u001b[1;32m 882\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mversion\u001b[39m(distribution_name):\n\u001b[1;32m 883\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Get the version string for the named package.\u001b[39;00m\n\u001b[1;32m 884\u001b[0m \n\u001b[1;32m 885\u001b[0m \u001b[38;5;124;03m :param distribution_name: The name of the distribution package to query.\u001b[39;00m\n\u001b[1;32m 886\u001b[0m \u001b[38;5;124;03m :return: The version string for the package as defined in the package's\u001b[39;00m\n\u001b[1;32m 887\u001b[0m \u001b[38;5;124;03m \"Version\" metadata key.\u001b[39;00m\n\u001b[1;32m 888\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 889\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdistribution\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdistribution_name\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mversion\n",
|
49 |
+
"File \u001b[0;32m~/miniconda3/lib/python3.12/importlib/metadata/__init__.py:862\u001b[0m, in \u001b[0;36mdistribution\u001b[0;34m(distribution_name)\u001b[0m\n\u001b[1;32m 856\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdistribution\u001b[39m(distribution_name):\n\u001b[1;32m 857\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Get the ``Distribution`` instance for the named package.\u001b[39;00m\n\u001b[1;32m 858\u001b[0m \n\u001b[1;32m 859\u001b[0m \u001b[38;5;124;03m :param distribution_name: The name of the distribution package as a string.\u001b[39;00m\n\u001b[1;32m 860\u001b[0m \u001b[38;5;124;03m :return: A ``Distribution`` instance (or subclass thereof).\u001b[39;00m\n\u001b[1;32m 861\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 862\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mDistribution\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_name\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdistribution_name\u001b[49m\u001b[43m)\u001b[49m\n",
|
50 |
+
"File \u001b[0;32m~/miniconda3/lib/python3.12/importlib/metadata/__init__.py:399\u001b[0m, in \u001b[0;36mDistribution.from_name\u001b[0;34m(cls, name)\u001b[0m\n\u001b[1;32m 397\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mnext\u001b[39m(\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mdiscover(name\u001b[38;5;241m=\u001b[39mname))\n\u001b[1;32m 398\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m:\n\u001b[0;32m--> 399\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m PackageNotFoundError(name)\n",
|
51 |
+
"\u001b[0;31mPackageNotFoundError\u001b[0m: No package metadata was found for auto-gptq"
|
52 |
+
]
|
53 |
+
}
|
54 |
+
],
|
55 |
+
"source": [
|
56 |
+
"\n",
|
57 |
+
"from transformers import AutoModelForCausalLM, AutoTokenizer\n",
|
58 |
+
"\n",
|
59 |
+
"# 模型路径\n",
|
60 |
+
"model_path = \"./\"\n",
|
61 |
+
"\n",
|
62 |
+
"# 加载模型,显式指定 torch_dtype\n",
|
63 |
+
"model = AutoModelForCausalLM.from_pretrained(\n",
|
64 |
+
" model_path,\n",
|
65 |
+
" torch_dtype=torch.float16, # 或 torch.float16,与权重一致\n",
|
66 |
+
" device_map=\"auto\" # 可选,自动分配设备\n",
|
67 |
+
")\n",
|
68 |
+
"\n",
|
69 |
+
"# 加载分词器\n",
|
70 |
+
"tokenizer = AutoTokenizer.from_pretrained(model_path)\n",
|
71 |
+
"\n",
|
72 |
+
"# 测试模型\n",
|
73 |
+
"input_text = \"Hello, how are you?\"\n",
|
74 |
+
"inputs = tokenizer(input_text, return_tensors=\"pt\")\n",
|
75 |
+
"outputs = model.generate(**inputs)\n",
|
76 |
+
"print(tokenizer.decode(outputs[0], skip_special_tokens=True))"
|
77 |
+
]
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"cell_type": "code",
|
81 |
+
"execution_count": 5,
|
82 |
+
"id": "9d15b55d-f5bc-40ad-80e5-81d421bd0592",
|
83 |
+
"metadata": {},
|
84 |
+
"outputs": [
|
85 |
+
{
|
86 |
+
"name": "stdout",
|
87 |
+
"output_type": "stream",
|
88 |
+
"text": [
|
89 |
+
"Looking in indexes: http://mirrors.aliyun.com/pypi/simple\n",
|
90 |
+
"Collecting auto-gptq\n",
|
91 |
+
" Downloading http://mirrors.aliyun.com/pypi/packages/90/e5/b22697903982284fe284568fb2663a2196694a8eee637f5cf4ccfe435a38/auto_gptq-0.7.1.tar.gz (126 kB)\n",
|
92 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m126.1/126.1 kB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
|
93 |
+
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n",
|
94 |
+
"\u001b[?25hDiscarding \u001b[4;34mhttp://mirrors.aliyun.com/pypi/packages/90/e5/b22697903982284fe284568fb2663a2196694a8eee637f5cf4ccfe435a38/auto_gptq-0.7.1.tar.gz#sha256=5c61ad380e9b4c603757c254765e9083a90a820cd0aff1b5d2c6f7fd96c85e80 (from http://mirrors.aliyun.com/pypi/simple/auto-gptq/) (requires-python:>=3.8.0)\u001b[0m: \u001b[33mRequested auto-gptq from http://mirrors.aliyun.com/pypi/packages/90/e5/b22697903982284fe284568fb2663a2196694a8eee637f5cf4ccfe435a38/auto_gptq-0.7.1.tar.gz#sha256=5c61ad380e9b4c603757c254765e9083a90a820cd0aff1b5d2c6f7fd96c85e80 has inconsistent version: expected '0.7.1', but metadata has '0.7.1+cu1241'\u001b[0m\n",
|
95 |
+
" Downloading http://mirrors.aliyun.com/pypi/packages/34/71/c3e73cf17681f6ff4754ef8f4cb8b67af3def230fc8711eac1250bbd78d5/auto_gptq-0.7.0.tar.gz (124 kB)\n",
|
96 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.6/124.6 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
|
97 |
+
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n",
|
98 |
+
"\u001b[?25hDiscarding \u001b[4;34mhttp://mirrors.aliyun.com/pypi/packages/34/71/c3e73cf17681f6ff4754ef8f4cb8b67af3def230fc8711eac1250bbd78d5/auto_gptq-0.7.0.tar.gz#sha256=50a5396fae2db5a19446b3198ef0e86ee520846b881db47bdbf4eb9260eac723 (from http://mirrors.aliyun.com/pypi/simple/auto-gptq/) (requires-python:>=3.8.0)\u001b[0m: \u001b[33mRequested auto-gptq from http://mirrors.aliyun.com/pypi/packages/34/71/c3e73cf17681f6ff4754ef8f4cb8b67af3def230fc8711eac1250bbd78d5/auto_gptq-0.7.0.tar.gz#sha256=50a5396fae2db5a19446b3198ef0e86ee520846b881db47bdbf4eb9260eac723 has inconsistent version: expected '0.7.0', but metadata has '0.7.0+cu1241'\u001b[0m\n",
|
99 |
+
" Downloading http://mirrors.aliyun.com/pypi/packages/49/af/02b66e55dfd9aeb0ece923843043724ed7432ec0c649ea0f3b9fa1dd90c6/auto_gptq-0.6.0.tar.gz (120 kB)\n",
|
100 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.0/121.0 kB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
|
101 |
+
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25lerror\n",
|
102 |
+
" \u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n",
|
103 |
+
" \n",
|
104 |
+
" \u001b[31m×\u001b[0m \u001b[32mpython setup.py egg_info\u001b[0m did not run successfully.\n",
|
105 |
+
" \u001b[31m│\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n",
|
106 |
+
" \u001b[31m╰─>\u001b[0m \u001b[31m[20 lines of output]\u001b[0m\n",
|
107 |
+
" \u001b[31m \u001b[0m python: can't open file '/tmp/pip-install-9tzda5pv/auto-gptq_60a9cac9f2ef4bb89bb1ce3f0d0126b0/./autogptq_extension/qigen/generate.py': [Errno 2] No such file or directory\n",
|
108 |
+
" \u001b[31m \u001b[0m Traceback (most recent call last):\n",
|
109 |
+
" \u001b[31m \u001b[0m File \"/tmp/pip-install-9tzda5pv/auto-gptq_60a9cac9f2ef4bb89bb1ce3f0d0126b0/setup.py\", line 109, in <module>\n",
|
110 |
+
" \u001b[31m \u001b[0m subprocess.check_output([\"python\", \"./autogptq_extension/qigen/generate.py\", \"--module\", \"--search\", \"--p\", str(p)])\n",
|
111 |
+
" \u001b[31m \u001b[0m File \"/root/miniconda3/lib/python3.12/subprocess.py\", line 466, in check_output\n",
|
112 |
+
" \u001b[31m \u001b[0m return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,\n",
|
113 |
+
" \u001b[31m \u001b[0m ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
114 |
+
" \u001b[31m \u001b[0m File \"/root/miniconda3/lib/python3.12/subprocess.py\", line 571, in run\n",
|
115 |
+
" \u001b[31m \u001b[0m raise CalledProcessError(retcode, process.args,\n",
|
116 |
+
" \u001b[31m \u001b[0m subprocess.CalledProcessError: Command '['python', './autogptq_extension/qigen/generate.py', '--module', '--search', '--p', '48']' returned non-zero exit status 2.\n",
|
117 |
+
" \u001b[31m \u001b[0m \n",
|
118 |
+
" \u001b[31m \u001b[0m During handling of the above exception, another exception occurred:\n",
|
119 |
+
" \u001b[31m \u001b[0m \n",
|
120 |
+
" \u001b[31m \u001b[0m Traceback (most recent call last):\n",
|
121 |
+
" \u001b[31m \u001b[0m File \"<string>\", line 2, in <module>\n",
|
122 |
+
" \u001b[31m \u001b[0m File \"<pip-setuptools-caller>\", line 34, in <module>\n",
|
123 |
+
" \u001b[31m \u001b[0m File \"/tmp/pip-install-9tzda5pv/auto-gptq_60a9cac9f2ef4bb89bb1ce3f0d0126b0/setup.py\", line 111, in <module>\n",
|
124 |
+
" \u001b[31m \u001b[0m raise Exception(f\"Generating QiGen kernels failed with the error shown above.\")\n",
|
125 |
+
" \u001b[31m \u001b[0m Exception: Generating QiGen kernels failed with the error shown above.\n",
|
126 |
+
" \u001b[31m \u001b[0m Generating qigen kernels...\n",
|
127 |
+
" \u001b[31m \u001b[0m \u001b[31m[end of output]\u001b[0m\n",
|
128 |
+
" \n",
|
129 |
+
" \u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n",
|
130 |
+
"\u001b[1;31merror\u001b[0m: \u001b[1mmetadata-generation-failed\u001b[0m\n",
|
131 |
+
"\n",
|
132 |
+
"\u001b[31m×\u001b[0m Encountered error while generating package metadata.\n",
|
133 |
+
"\u001b[31m╰─>\u001b[0m See above for output.\n",
|
134 |
+
"\n",
|
135 |
+
"\u001b[1;35mnote\u001b[0m: This is an issue with the package mentioned above, not pip.\n",
|
136 |
+
"\u001b[1;36mhint\u001b[0m: See above for details.\n",
|
137 |
+
"\u001b[?25h"
|
138 |
+
]
|
139 |
+
}
|
140 |
+
],
|
141 |
+
"source": [
|
142 |
+
"!pip install auto-gptq"
|
143 |
+
]
|
144 |
+
},
|
145 |
+
{
|
146 |
+
"cell_type": "code",
|
147 |
+
"execution_count": null,
|
148 |
+
"id": "b8c14f99-2926-433f-8249-58cc123ce73d",
|
149 |
+
"metadata": {},
|
150 |
+
"outputs": [],
|
151 |
+
"source": []
|
152 |
+
}
|
153 |
+
],
|
154 |
+
"metadata": {
|
155 |
+
"kernelspec": {
|
156 |
+
"display_name": "Python 3 (ipykernel)",
|
157 |
+
"language": "python",
|
158 |
+
"name": "python3"
|
159 |
+
},
|
160 |
+
"language_info": {
|
161 |
+
"codemirror_mode": {
|
162 |
+
"name": "ipython",
|
163 |
+
"version": 3
|
164 |
+
},
|
165 |
+
"file_extension": ".py",
|
166 |
+
"mimetype": "text/x-python",
|
167 |
+
"name": "python",
|
168 |
+
"nbconvert_exporter": "python",
|
169 |
+
"pygments_lexer": "ipython3",
|
170 |
+
"version": "3.12.3"
|
171 |
+
}
|
172 |
+
},
|
173 |
+
"nbformat": 4,
|
174 |
+
"nbformat_minor": 5
|
175 |
+
}
|
config.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"_attn_implementation_autoset": true,
|
3 |
-
"_name_or_path": "
|
4 |
"architectures": [
|
5 |
"Qwen2ForCausalLM"
|
6 |
],
|
@@ -43,7 +43,7 @@
|
|
43 |
"rope_theta": 1000000.0,
|
44 |
"sliding_window": null,
|
45 |
"tie_word_embeddings": true,
|
46 |
-
"torch_dtype": "
|
47 |
"transformers_version": "4.47.1",
|
48 |
"use_cache": true,
|
49 |
"use_sliding_window": false,
|
|
|
1 |
{
|
2 |
"_attn_implementation_autoset": true,
|
3 |
+
"_name_or_path": "Qwen2.5-0.5B-Instruct-FP16",
|
4 |
"architectures": [
|
5 |
"Qwen2ForCausalLM"
|
6 |
],
|
|
|
43 |
"rope_theta": 1000000.0,
|
44 |
"sliding_window": null,
|
45 |
"tie_word_embeddings": true,
|
46 |
+
"torch_dtype": "float16",
|
47 |
"transformers_version": "4.47.1",
|
48 |
"use_cache": true,
|
49 |
"use_sliding_window": false,
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50b54dd93397a6b301fa3c735b5d2763fbcacc9bf63e7cb76e9159a7b378c3ee
|
3 |
+
size 641496696
|
model_fp16.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03dc9e241100a8782ad4c8d5c4751bbaf8c02b8a0360f02f84dcb5973925bb79
|
3 |
+
size 595928496
|
quant_log.csv
CHANGED
@@ -1,169 +1,169 @@
|
|
1 |
layer,module,loss,damp,time
|
2 |
-
0,self_attn.k_proj,0.
|
3 |
-
0,self_attn.v_proj,0.00003,0.01000,0.
|
4 |
-
0,self_attn.q_proj,0.
|
5 |
-
0,self_attn.o_proj,0.00001,0.01000,0.
|
6 |
-
0,mlp.up_proj,0.
|
7 |
-
0,mlp.gate_proj,0.
|
8 |
-
0,mlp.down_proj,0.
|
9 |
-
1,self_attn.k_proj,0.
|
10 |
-
1,self_attn.v_proj,0.
|
11 |
-
1,self_attn.q_proj,0.
|
12 |
-
1,self_attn.o_proj,0.
|
13 |
-
1,mlp.up_proj,0.
|
14 |
-
1,mlp.gate_proj,0.
|
15 |
-
1,mlp.down_proj,0.
|
16 |
-
2,self_attn.k_proj,0.
|
17 |
-
2,self_attn.v_proj,0.
|
18 |
-
2,self_attn.q_proj,0.
|
19 |
-
2,self_attn.o_proj,0.00084,0.01000,0.
|
20 |
-
2,mlp.up_proj,0.
|
21 |
-
2,mlp.gate_proj,0.
|
22 |
-
2,mlp.down_proj,3.
|
23 |
-
3,self_attn.k_proj,0.
|
24 |
-
3,self_attn.v_proj,0.
|
25 |
-
3,self_attn.q_proj,0.
|
26 |
-
3,self_attn.o_proj,0.
|
27 |
-
3,mlp.up_proj,1.
|
28 |
-
3,mlp.gate_proj,
|
29 |
-
3,mlp.down_proj,
|
30 |
-
4,self_attn.k_proj,0.
|
31 |
-
4,self_attn.v_proj,0.
|
32 |
-
4,self_attn.q_proj,0.
|
33 |
-
4,self_attn.o_proj,0.
|
34 |
-
4,mlp.up_proj,0.
|
35 |
-
4,mlp.gate_proj,0.
|
36 |
-
4,mlp.down_proj,0.
|
37 |
-
5,self_attn.k_proj,0.
|
38 |
-
5,self_attn.v_proj,0.
|
39 |
-
5,self_attn.q_proj,0.
|
40 |
-
5,self_attn.o_proj,0.
|
41 |
-
5,mlp.up_proj,0.
|
42 |
-
5,mlp.gate_proj,0.
|
43 |
-
5,mlp.down_proj,0.
|
44 |
-
6,self_attn.k_proj,0.
|
45 |
-
6,self_attn.v_proj,0.
|
46 |
-
6,self_attn.q_proj,0.
|
47 |
-
6,self_attn.o_proj,0.
|
48 |
-
6,mlp.up_proj,0.
|
49 |
-
6,mlp.gate_proj,0.
|
50 |
-
6,mlp.down_proj,0.
|
51 |
-
7,self_attn.k_proj,0.
|
52 |
-
7,self_attn.v_proj,0.
|
53 |
-
7,self_attn.q_proj,0.
|
54 |
-
7,self_attn.o_proj,0.
|
55 |
-
7,mlp.up_proj,0.
|
56 |
-
7,mlp.gate_proj,0.
|
57 |
-
7,mlp.down_proj,0.
|
58 |
-
8,self_attn.k_proj,0.
|
59 |
-
8,self_attn.v_proj,0.
|
60 |
-
8,self_attn.q_proj,0.
|
61 |
-
8,self_attn.o_proj,0.
|
62 |
-
8,mlp.up_proj,0.
|
63 |
-
8,mlp.gate_proj,0.
|
64 |
-
8,mlp.down_proj,0.
|
65 |
-
9,self_attn.k_proj,0.
|
66 |
-
9,self_attn.v_proj,0.
|
67 |
-
9,self_attn.q_proj,0.
|
68 |
-
9,self_attn.o_proj,0.
|
69 |
-
9,mlp.up_proj,0.
|
70 |
-
9,mlp.gate_proj,0.
|
71 |
-
9,mlp.down_proj,0.
|
72 |
-
10,self_attn.k_proj,0.
|
73 |
-
10,self_attn.v_proj,0.
|
74 |
-
10,self_attn.q_proj,0.
|
75 |
-
10,self_attn.o_proj,0.
|
76 |
-
10,mlp.up_proj,0.
|
77 |
-
10,mlp.gate_proj,0.
|
78 |
-
10,mlp.down_proj,0.
|
79 |
-
11,self_attn.k_proj,0.
|
80 |
-
11,self_attn.v_proj,0.
|
81 |
-
11,self_attn.q_proj,0.
|
82 |
-
11,self_attn.o_proj,0.
|
83 |
-
11,mlp.up_proj,0.
|
84 |
-
11,mlp.gate_proj,0.
|
85 |
-
11,mlp.down_proj,0.
|
86 |
-
12,self_attn.k_proj,0.
|
87 |
-
12,self_attn.v_proj,0.
|
88 |
-
12,self_attn.q_proj,0.
|
89 |
-
12,self_attn.o_proj,0.
|
90 |
-
12,mlp.up_proj,0.
|
91 |
-
12,mlp.gate_proj,0.
|
92 |
-
12,mlp.down_proj,0.
|
93 |
-
13,self_attn.k_proj,0.
|
94 |
-
13,self_attn.v_proj,0.
|
95 |
-
13,self_attn.q_proj,0.
|
96 |
-
13,self_attn.o_proj,0.
|
97 |
-
13,mlp.up_proj,0.
|
98 |
-
13,mlp.gate_proj,0.
|
99 |
-
13,mlp.down_proj,0.
|
100 |
-
14,self_attn.k_proj,0.
|
101 |
-
14,self_attn.v_proj,0.
|
102 |
-
14,self_attn.q_proj,0.
|
103 |
-
14,self_attn.o_proj,0.
|
104 |
-
14,mlp.up_proj,0.
|
105 |
-
14,mlp.gate_proj,0.
|
106 |
-
14,mlp.down_proj,0.
|
107 |
-
15,self_attn.k_proj,0.
|
108 |
-
15,self_attn.v_proj,0.
|
109 |
-
15,self_attn.q_proj,0.
|
110 |
-
15,self_attn.o_proj,0.
|
111 |
-
15,mlp.up_proj,0.
|
112 |
-
15,mlp.gate_proj,0.
|
113 |
-
15,mlp.down_proj,0.
|
114 |
-
16,self_attn.k_proj,0.
|
115 |
-
16,self_attn.v_proj,0.
|
116 |
-
16,self_attn.q_proj,0.
|
117 |
-
16,self_attn.o_proj,0.
|
118 |
-
16,mlp.up_proj,0.
|
119 |
-
16,mlp.gate_proj,0.
|
120 |
-
16,mlp.down_proj,0.
|
121 |
-
17,self_attn.k_proj,0.
|
122 |
-
17,self_attn.v_proj,0.
|
123 |
-
17,self_attn.q_proj,0.
|
124 |
-
17,self_attn.o_proj,0.
|
125 |
-
17,mlp.up_proj,0.
|
126 |
-
17,mlp.gate_proj,0.
|
127 |
-
17,mlp.down_proj,0.
|
128 |
-
18,self_attn.k_proj,0.
|
129 |
-
18,self_attn.v_proj,0.
|
130 |
-
18,self_attn.q_proj,0.
|
131 |
-
18,self_attn.o_proj,0.
|
132 |
-
18,mlp.up_proj,0.
|
133 |
-
18,mlp.gate_proj,0.
|
134 |
-
18,mlp.down_proj,0.
|
135 |
-
19,self_attn.k_proj,0.
|
136 |
-
19,self_attn.v_proj,0.
|
137 |
-
19,self_attn.q_proj,0.
|
138 |
-
19,self_attn.o_proj,0.
|
139 |
-
19,mlp.up_proj,0.
|
140 |
-
19,mlp.gate_proj,0.
|
141 |
-
19,mlp.down_proj,0.
|
142 |
-
20,self_attn.k_proj,0.
|
143 |
-
20,self_attn.v_proj,0.
|
144 |
-
20,self_attn.q_proj,0.
|
145 |
-
20,self_attn.o_proj,0.
|
146 |
-
20,mlp.up_proj,0.
|
147 |
-
20,mlp.gate_proj,0.
|
148 |
-
20,mlp.down_proj,0.
|
149 |
-
21,self_attn.k_proj,0.
|
150 |
-
21,self_attn.v_proj,0.
|
151 |
-
21,self_attn.q_proj,0.
|
152 |
-
21,self_attn.o_proj,0.
|
153 |
-
21,mlp.up_proj,1.
|
154 |
-
21,mlp.gate_proj,
|
155 |
-
21,mlp.down_proj,17.
|
156 |
-
22,self_attn.k_proj,0.
|
157 |
-
22,self_attn.v_proj,0.
|
158 |
-
22,self_attn.q_proj,0.
|
159 |
-
22,self_attn.o_proj,0.
|
160 |
-
22,mlp.up_proj,0.
|
161 |
-
22,mlp.gate_proj,0.
|
162 |
-
22,mlp.down_proj,0.
|
163 |
-
23,self_attn.k_proj,0.
|
164 |
-
23,self_attn.v_proj,0.
|
165 |
-
23,self_attn.q_proj,0.
|
166 |
-
23,self_attn.o_proj,0.
|
167 |
-
23,mlp.up_proj,0.
|
168 |
-
23,mlp.gate_proj,0.
|
169 |
-
23,mlp.down_proj,0.
|
|
|
1 |
layer,module,loss,damp,time
|
2 |
+
0,self_attn.k_proj,0.00136,0.01000,0.325
|
3 |
+
0,self_attn.v_proj,0.00003,0.01000,0.129
|
4 |
+
0,self_attn.q_proj,0.01018,0.01000,0.137
|
5 |
+
0,self_attn.o_proj,0.00001,0.01000,0.134
|
6 |
+
0,mlp.up_proj,0.09437,0.01000,0.139
|
7 |
+
0,mlp.gate_proj,0.18865,0.01000,0.139
|
8 |
+
0,mlp.down_proj,0.00143,0.01000,0.735
|
9 |
+
1,self_attn.k_proj,0.00999,0.01000,0.135
|
10 |
+
1,self_attn.v_proj,0.00103,0.01000,0.135
|
11 |
+
1,self_attn.q_proj,0.03432,0.01000,0.135
|
12 |
+
1,self_attn.o_proj,0.00249,0.01000,0.132
|
13 |
+
1,mlp.up_proj,0.10145,0.01000,0.145
|
14 |
+
1,mlp.gate_proj,0.19494,0.01000,0.138
|
15 |
+
1,mlp.down_proj,0.00111,0.01000,0.741
|
16 |
+
2,self_attn.k_proj,0.01739,0.01000,0.135
|
17 |
+
2,self_attn.v_proj,0.00208,0.01000,0.130
|
18 |
+
2,self_attn.q_proj,0.05895,0.01000,0.135
|
19 |
+
2,self_attn.o_proj,0.00084,0.01000,0.132
|
20 |
+
2,mlp.up_proj,0.15928,0.01000,0.139
|
21 |
+
2,mlp.gate_proj,0.29904,0.01000,0.137
|
22 |
+
2,mlp.down_proj,3.25813,0.01000,0.724
|
23 |
+
3,self_attn.k_proj,0.02043,0.01000,0.141
|
24 |
+
3,self_attn.v_proj,0.00490,0.01000,0.138
|
25 |
+
3,self_attn.q_proj,0.08232,0.01000,0.139
|
26 |
+
3,self_attn.o_proj,0.00070,0.01000,0.141
|
27 |
+
3,mlp.up_proj,1.52620,0.01000,0.140
|
28 |
+
3,mlp.gate_proj,13.87913,0.01000,0.153
|
29 |
+
3,mlp.down_proj,3.20606,0.01000,0.734
|
30 |
+
4,self_attn.k_proj,0.01640,0.01000,0.135
|
31 |
+
4,self_attn.v_proj,0.00691,0.01000,0.131
|
32 |
+
4,self_attn.q_proj,0.07569,0.01000,0.132
|
33 |
+
4,self_attn.o_proj,0.00309,0.01000,0.134
|
34 |
+
4,mlp.up_proj,0.24986,0.01000,0.136
|
35 |
+
4,mlp.gate_proj,0.42210,0.01000,0.138
|
36 |
+
4,mlp.down_proj,0.00362,0.01000,0.737
|
37 |
+
5,self_attn.k_proj,0.01574,0.01000,0.137
|
38 |
+
5,self_attn.v_proj,0.00762,0.01000,0.133
|
39 |
+
5,self_attn.q_proj,0.07645,0.01000,0.136
|
40 |
+
5,self_attn.o_proj,0.00106,0.01000,0.138
|
41 |
+
5,mlp.up_proj,0.30390,0.01000,0.143
|
42 |
+
5,mlp.gate_proj,0.71931,0.01000,0.147
|
43 |
+
5,mlp.down_proj,0.00614,0.01000,0.733
|
44 |
+
6,self_attn.k_proj,0.02019,0.01000,0.139
|
45 |
+
6,self_attn.v_proj,0.00567,0.01000,0.138
|
46 |
+
6,self_attn.q_proj,0.07846,0.01000,0.140
|
47 |
+
6,self_attn.o_proj,0.00102,0.01000,0.140
|
48 |
+
6,mlp.up_proj,0.25922,0.01000,0.147
|
49 |
+
6,mlp.gate_proj,0.42813,0.01000,0.144
|
50 |
+
6,mlp.down_proj,0.00318,0.01000,0.753
|
51 |
+
7,self_attn.k_proj,0.02236,0.01000,0.144
|
52 |
+
7,self_attn.v_proj,0.00721,0.01000,0.146
|
53 |
+
7,self_attn.q_proj,0.08888,0.01000,0.135
|
54 |
+
7,self_attn.o_proj,0.00106,0.01000,0.140
|
55 |
+
7,mlp.up_proj,0.27111,0.01000,0.147
|
56 |
+
7,mlp.gate_proj,0.37811,0.01000,0.143
|
57 |
+
7,mlp.down_proj,0.00381,0.01000,0.762
|
58 |
+
8,self_attn.k_proj,0.01918,0.01000,0.137
|
59 |
+
8,self_attn.v_proj,0.00650,0.01000,0.138
|
60 |
+
8,self_attn.q_proj,0.08556,0.01000,0.140
|
61 |
+
8,self_attn.o_proj,0.00224,0.01000,0.140
|
62 |
+
8,mlp.up_proj,0.23711,0.01000,0.140
|
63 |
+
8,mlp.gate_proj,0.33877,0.01000,0.141
|
64 |
+
8,mlp.down_proj,0.00247,0.01000,0.745
|
65 |
+
9,self_attn.k_proj,0.03478,0.01000,0.141
|
66 |
+
9,self_attn.v_proj,0.01390,0.01000,0.145
|
67 |
+
9,self_attn.q_proj,0.16694,0.01000,0.139
|
68 |
+
9,self_attn.o_proj,0.00133,0.01000,0.140
|
69 |
+
9,mlp.up_proj,0.25157,0.01000,0.142
|
70 |
+
9,mlp.gate_proj,0.31524,0.01000,0.148
|
71 |
+
9,mlp.down_proj,0.00306,0.01000,0.780
|
72 |
+
10,self_attn.k_proj,0.02065,0.01000,0.140
|
73 |
+
10,self_attn.v_proj,0.01247,0.01000,0.139
|
74 |
+
10,self_attn.q_proj,0.09851,0.01000,0.141
|
75 |
+
10,self_attn.o_proj,0.00224,0.01000,0.141
|
76 |
+
10,mlp.up_proj,0.20222,0.01000,0.145
|
77 |
+
10,mlp.gate_proj,0.26411,0.01000,0.144
|
78 |
+
10,mlp.down_proj,0.00195,0.01000,0.758
|
79 |
+
11,self_attn.k_proj,0.03751,0.01000,0.138
|
80 |
+
11,self_attn.v_proj,0.01373,0.01000,0.137
|
81 |
+
11,self_attn.q_proj,0.16694,0.01000,0.138
|
82 |
+
11,self_attn.o_proj,0.00163,0.01000,0.136
|
83 |
+
11,mlp.up_proj,0.23553,0.01000,0.142
|
84 |
+
11,mlp.gate_proj,0.25575,0.01000,0.141
|
85 |
+
11,mlp.down_proj,0.00258,0.01000,0.766
|
86 |
+
12,self_attn.k_proj,0.01699,0.01000,0.141
|
87 |
+
12,self_attn.v_proj,0.00987,0.01000,0.143
|
88 |
+
12,self_attn.q_proj,0.09044,0.01000,0.149
|
89 |
+
12,self_attn.o_proj,0.00252,0.01000,0.145
|
90 |
+
12,mlp.up_proj,0.20484,0.01000,0.146
|
91 |
+
12,mlp.gate_proj,0.22553,0.01000,0.146
|
92 |
+
12,mlp.down_proj,0.00257,0.01000,0.767
|
93 |
+
13,self_attn.k_proj,0.02551,0.01000,0.135
|
94 |
+
13,self_attn.v_proj,0.01122,0.01000,0.136
|
95 |
+
13,self_attn.q_proj,0.13072,0.01000,0.146
|
96 |
+
13,self_attn.o_proj,0.00144,0.01000,0.134
|
97 |
+
13,mlp.up_proj,0.23847,0.01000,0.147
|
98 |
+
13,mlp.gate_proj,0.24350,0.01000,0.151
|
99 |
+
13,mlp.down_proj,0.00315,0.01000,0.778
|
100 |
+
14,self_attn.k_proj,0.02141,0.01000,0.139
|
101 |
+
14,self_attn.v_proj,0.01697,0.01000,0.135
|
102 |
+
14,self_attn.q_proj,0.12928,0.01000,0.140
|
103 |
+
14,self_attn.o_proj,0.00471,0.01000,0.136
|
104 |
+
14,mlp.up_proj,0.22936,0.01000,0.139
|
105 |
+
14,mlp.gate_proj,0.26044,0.01000,0.147
|
106 |
+
14,mlp.down_proj,0.00375,0.01000,0.742
|
107 |
+
15,self_attn.k_proj,0.02037,0.01000,0.137
|
108 |
+
15,self_attn.v_proj,0.01484,0.01000,0.136
|
109 |
+
15,self_attn.q_proj,0.10820,0.01000,0.140
|
110 |
+
15,self_attn.o_proj,0.00345,0.01000,0.138
|
111 |
+
15,mlp.up_proj,0.30909,0.01000,0.143
|
112 |
+
15,mlp.gate_proj,0.32287,0.01000,0.149
|
113 |
+
15,mlp.down_proj,0.00640,0.01000,0.748
|
114 |
+
16,self_attn.k_proj,0.02956,0.01000,0.139
|
115 |
+
16,self_attn.v_proj,0.02730,0.01000,0.136
|
116 |
+
16,self_attn.q_proj,0.21341,0.01000,0.137
|
117 |
+
16,self_attn.o_proj,0.00164,0.01000,0.142
|
118 |
+
16,mlp.up_proj,0.36086,0.01000,0.146
|
119 |
+
16,mlp.gate_proj,0.47303,0.01000,0.153
|
120 |
+
16,mlp.down_proj,0.01073,0.01000,0.753
|
121 |
+
17,self_attn.k_proj,0.02010,0.01000,0.144
|
122 |
+
17,self_attn.v_proj,0.02237,0.01000,0.138
|
123 |
+
17,self_attn.q_proj,0.13404,0.01000,0.138
|
124 |
+
17,self_attn.o_proj,0.00350,0.01000,0.138
|
125 |
+
17,mlp.up_proj,0.42924,0.01000,0.140
|
126 |
+
17,mlp.gate_proj,0.66441,0.01000,0.142
|
127 |
+
17,mlp.down_proj,0.00841,0.01000,0.736
|
128 |
+
18,self_attn.k_proj,0.02365,0.01000,0.142
|
129 |
+
18,self_attn.v_proj,0.01843,0.01000,0.133
|
130 |
+
18,self_attn.q_proj,0.11482,0.01000,0.138
|
131 |
+
18,self_attn.o_proj,0.00114,0.01000,0.136
|
132 |
+
18,mlp.up_proj,0.41826,0.01000,0.142
|
133 |
+
18,mlp.gate_proj,0.52589,0.01000,0.144
|
134 |
+
18,mlp.down_proj,0.00837,0.01000,0.747
|
135 |
+
19,self_attn.k_proj,0.01885,0.01000,0.144
|
136 |
+
19,self_attn.v_proj,0.01739,0.01000,0.145
|
137 |
+
19,self_attn.q_proj,0.11650,0.01000,0.139
|
138 |
+
19,self_attn.o_proj,0.00337,0.01000,0.143
|
139 |
+
19,mlp.up_proj,0.61293,0.01000,0.146
|
140 |
+
19,mlp.gate_proj,0.73786,0.01000,0.147
|
141 |
+
19,mlp.down_proj,0.01766,0.01000,0.743
|
142 |
+
20,self_attn.k_proj,0.02636,0.01000,0.137
|
143 |
+
20,self_attn.v_proj,0.05430,0.01000,0.137
|
144 |
+
20,self_attn.q_proj,0.16743,0.01000,0.145
|
145 |
+
20,self_attn.o_proj,0.01072,0.01000,0.140
|
146 |
+
20,mlp.up_proj,0.71327,0.01000,0.136
|
147 |
+
20,mlp.gate_proj,0.78980,0.01000,0.147
|
148 |
+
20,mlp.down_proj,0.02940,0.01000,0.730
|
149 |
+
21,self_attn.k_proj,0.03263,0.01000,0.137
|
150 |
+
21,self_attn.v_proj,0.10656,0.01000,0.136
|
151 |
+
21,self_attn.q_proj,0.22666,0.01000,0.139
|
152 |
+
21,self_attn.o_proj,0.02572,0.01000,0.138
|
153 |
+
21,mlp.up_proj,1.93951,0.01000,0.140
|
154 |
+
21,mlp.gate_proj,4.54990,0.01000,0.143
|
155 |
+
21,mlp.down_proj,17.82846,0.01000,0.752
|
156 |
+
22,self_attn.k_proj,0.02069,0.01000,0.143
|
157 |
+
22,self_attn.v_proj,0.06015,0.01000,0.136
|
158 |
+
22,self_attn.q_proj,0.15573,0.01000,0.143
|
159 |
+
22,self_attn.o_proj,0.01185,0.01000,0.138
|
160 |
+
22,mlp.up_proj,0.59293,0.01000,0.144
|
161 |
+
22,mlp.gate_proj,0.57112,0.01000,0.149
|
162 |
+
22,mlp.down_proj,0.05477,0.01000,0.768
|
163 |
+
23,self_attn.k_proj,0.02633,0.01000,0.137
|
164 |
+
23,self_attn.v_proj,0.05556,0.01000,0.141
|
165 |
+
23,self_attn.q_proj,0.19868,0.01000,0.136
|
166 |
+
23,self_attn.o_proj,0.03036,0.01000,0.139
|
167 |
+
23,mlp.up_proj,0.86650,0.01000,0.141
|
168 |
+
23,mlp.gate_proj,0.89305,0.01000,0.142
|
169 |
+
23,mlp.down_proj,0.48127,0.01000,0.736
|
untitled.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from safetensors import safe_open
|
3 |
+
from safetensors.torch import save_file
|
4 |
+
|
5 |
+
# 加载 safetensors 文件
|
6 |
+
model_path = "model.safetensors"
|
7 |
+
with safe_open(model_path, framework="pt", device="cpu") as f:
|
8 |
+
state_dict = {key: f.get_tensor(key) for key in f.keys()}
|
9 |
+
|
10 |
+
# 将 BF16 转换为 FP16
|
11 |
+
fp16_state_dict = {key: value.to(torch.float16) for key, value in state_dict.items()}
|
12 |
+
|
13 |
+
# 保存为新的 safetensors 文件
|
14 |
+
output_path = "model_fp16.safetensors"
|
15 |
+
save_file(fp16_state_dict, output_path)
|