3v324v23 commited on
Commit
45be9e6
·
1 Parent(s): 8872684

bf16 to fp16

Browse files
Files changed (6) hide show
  1. Untitled.ipynb +175 -0
  2. config.json +2 -2
  3. model.safetensors +2 -2
  4. model_fp16.safetensors +3 -0
  5. quant_log.csv +168 -168
  6. untitled.py +15 -0
Untitled.ipynb ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "6363d4dd-5c18-4652-b682-bf54a468aae5",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import torch\n",
11
+ "from safetensors import safe_open\n",
12
+ "from safetensors.torch import save_file\n",
13
+ "\n",
14
+ "# 加载 safetensors 文件\n",
15
+ "model_path = \"model.safetensors\"\n",
16
+ "with safe_open(model_path, framework=\"pt\", device=\"cpu\") as f:\n",
17
+ " state_dict = {key: f.get_tensor(key) for key in f.keys()}\n",
18
+ "\n",
19
+ "# 将 BF16 转换为 FP16\n",
20
+ "fp16_state_dict = {key: value.to(torch.float16) for key, value in state_dict.items()}\n",
21
+ "\n",
22
+ "# 保存为新的 safetensors 文件\n",
23
+ "output_path = \"model_fp16.safetensors\"\n",
24
+ "save_file(fp16_state_dict, output_path)"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": 4,
30
+ "id": "9aa60eaa-883a-465e-958a-3280e0439f66",
31
+ "metadata": {},
32
+ "outputs": [
33
+ {
34
+ "ename": "PackageNotFoundError",
35
+ "evalue": "No package metadata was found for auto-gptq",
36
+ "output_type": "error",
37
+ "traceback": [
38
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
39
+ "\u001b[0;31mStopIteration\u001b[0m Traceback (most recent call last)",
40
+ "File \u001b[0;32m~/miniconda3/lib/python3.12/importlib/metadata/__init__.py:397\u001b[0m, in \u001b[0;36mDistribution.from_name\u001b[0;34m(cls, name)\u001b[0m\n\u001b[1;32m 396\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 397\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mnext\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdiscover\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 398\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m:\n",
41
+ "\u001b[0;31mStopIteration\u001b[0m: ",
42
+ "\nDuring handling of the above exception, another exception occurred:\n",
43
+ "\u001b[0;31mPackageNotFoundError\u001b[0m Traceback (most recent call last)",
44
+ "Cell \u001b[0;32mIn[4], line 7\u001b[0m\n\u001b[1;32m 4\u001b[0m model_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# 加载模型,显式指定 torch_dtype\u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mAutoModelForCausalLM\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mtorch_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloat16\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# 或 torch.float16,与权重一致\u001b[39;49;00m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mauto\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# 可选,自动分配设备\u001b[39;49;00m\n\u001b[1;32m 11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;66;03m# 加载分词器\u001b[39;00m\n\u001b[1;32m 14\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(model_path)\n",
45
+ "File \u001b[0;32m~/miniconda3/lib/python3.12/site-packages/transformers/models/auto/auto_factory.py:564\u001b[0m, in \u001b[0;36m_BaseAutoModelClass.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 562\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(config) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 563\u001b[0m model_class \u001b[38;5;241m=\u001b[39m _get_model_class(config, \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping)\n\u001b[0;32m--> 564\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_class\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 565\u001b[0m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mhub_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 566\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 567\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 568\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnrecognized configuration class \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for this kind of AutoModel: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 569\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel type should be one of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(c\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mc\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 570\u001b[0m )\n",
46
+ "File \u001b[0;32m~/miniconda3/lib/python3.12/site-packages/transformers/modeling_utils.py:3669\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 3666\u001b[0m hf_quantizer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 3668\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m hf_quantizer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 3669\u001b[0m \u001b[43mhf_quantizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate_environment\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3670\u001b[0m \u001b[43m \u001b[49m\u001b[43mtorch_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtorch_dtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3671\u001b[0m \u001b[43m \u001b[49m\u001b[43mfrom_tf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfrom_tf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3672\u001b[0m \u001b[43m \u001b[49m\u001b[43mfrom_flax\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfrom_flax\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3673\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice_map\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3674\u001b[0m \u001b[43m \u001b[49m\u001b[43mweights_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweights_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3675\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3676\u001b[0m torch_dtype \u001b[38;5;241m=\u001b[39m hf_quantizer\u001b[38;5;241m.\u001b[39mupdate_torch_dtype(torch_dtype)\n\u001b[1;32m 3677\u001b[0m device_map \u001b[38;5;241m=\u001b[39m hf_quantizer\u001b[38;5;241m.\u001b[39mupdate_device_map(device_map)\n",
47
+ "File \u001b[0;32m~/miniconda3/lib/python3.12/site-packages/transformers/quantizers/quantizer_gptq.py:52\u001b[0m, in \u001b[0;36mGptqHfQuantizer.validate_environment\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mvalidate_environment\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m---> 52\u001b[0m gptq_supports_cpu \u001b[38;5;241m=\u001b[39m version\u001b[38;5;241m.\u001b[39mparse(\u001b[43mimportlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmetadata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mversion\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mauto-gptq\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m) \u001b[38;5;241m>\u001b[39m version\u001b[38;5;241m.\u001b[39mparse(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m0.4.2\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 53\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m gptq_supports_cpu \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;241m.\u001b[39mis_available():\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGPU is required to quantize or run quantize model.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
48
+ "File \u001b[0;32m~/miniconda3/lib/python3.12/importlib/metadata/__init__.py:889\u001b[0m, in \u001b[0;36mversion\u001b[0;34m(distribution_name)\u001b[0m\n\u001b[1;32m 882\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mversion\u001b[39m(distribution_name):\n\u001b[1;32m 883\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Get the version string for the named package.\u001b[39;00m\n\u001b[1;32m 884\u001b[0m \n\u001b[1;32m 885\u001b[0m \u001b[38;5;124;03m :param distribution_name: The name of the distribution package to query.\u001b[39;00m\n\u001b[1;32m 886\u001b[0m \u001b[38;5;124;03m :return: The version string for the package as defined in the package's\u001b[39;00m\n\u001b[1;32m 887\u001b[0m \u001b[38;5;124;03m \"Version\" metadata key.\u001b[39;00m\n\u001b[1;32m 888\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 889\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdistribution\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdistribution_name\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mversion\n",
49
+ "File \u001b[0;32m~/miniconda3/lib/python3.12/importlib/metadata/__init__.py:862\u001b[0m, in \u001b[0;36mdistribution\u001b[0;34m(distribution_name)\u001b[0m\n\u001b[1;32m 856\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdistribution\u001b[39m(distribution_name):\n\u001b[1;32m 857\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Get the ``Distribution`` instance for the named package.\u001b[39;00m\n\u001b[1;32m 858\u001b[0m \n\u001b[1;32m 859\u001b[0m \u001b[38;5;124;03m :param distribution_name: The name of the distribution package as a string.\u001b[39;00m\n\u001b[1;32m 860\u001b[0m \u001b[38;5;124;03m :return: A ``Distribution`` instance (or subclass thereof).\u001b[39;00m\n\u001b[1;32m 861\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 862\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mDistribution\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_name\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdistribution_name\u001b[49m\u001b[43m)\u001b[49m\n",
50
+ "File \u001b[0;32m~/miniconda3/lib/python3.12/importlib/metadata/__init__.py:399\u001b[0m, in \u001b[0;36mDistribution.from_name\u001b[0;34m(cls, name)\u001b[0m\n\u001b[1;32m 397\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mnext\u001b[39m(\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mdiscover(name\u001b[38;5;241m=\u001b[39mname))\n\u001b[1;32m 398\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m:\n\u001b[0;32m--> 399\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m PackageNotFoundError(name)\n",
51
+ "\u001b[0;31mPackageNotFoundError\u001b[0m: No package metadata was found for auto-gptq"
52
+ ]
53
+ }
54
+ ],
55
+ "source": [
56
+ "\n",
57
+ "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
58
+ "\n",
59
+ "# 模型路径\n",
60
+ "model_path = \"./\"\n",
61
+ "\n",
62
+ "# 加载模型,显式指定 torch_dtype\n",
63
+ "model = AutoModelForCausalLM.from_pretrained(\n",
64
+ " model_path,\n",
65
+ " torch_dtype=torch.float16, # 或 torch.float16,与权重一致\n",
66
+ " device_map=\"auto\" # 可选,自动分配设备\n",
67
+ ")\n",
68
+ "\n",
69
+ "# 加载分词器\n",
70
+ "tokenizer = AutoTokenizer.from_pretrained(model_path)\n",
71
+ "\n",
72
+ "# 测试模型\n",
73
+ "input_text = \"Hello, how are you?\"\n",
74
+ "inputs = tokenizer(input_text, return_tensors=\"pt\")\n",
75
+ "outputs = model.generate(**inputs)\n",
76
+ "print(tokenizer.decode(outputs[0], skip_special_tokens=True))"
77
+ ]
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "execution_count": 5,
82
+ "id": "9d15b55d-f5bc-40ad-80e5-81d421bd0592",
83
+ "metadata": {},
84
+ "outputs": [
85
+ {
86
+ "name": "stdout",
87
+ "output_type": "stream",
88
+ "text": [
89
+ "Looking in indexes: http://mirrors.aliyun.com/pypi/simple\n",
90
+ "Collecting auto-gptq\n",
91
+ " Downloading http://mirrors.aliyun.com/pypi/packages/90/e5/b22697903982284fe284568fb2663a2196694a8eee637f5cf4ccfe435a38/auto_gptq-0.7.1.tar.gz (126 kB)\n",
92
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m126.1/126.1 kB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
93
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n",
94
+ "\u001b[?25hDiscarding \u001b[4;34mhttp://mirrors.aliyun.com/pypi/packages/90/e5/b22697903982284fe284568fb2663a2196694a8eee637f5cf4ccfe435a38/auto_gptq-0.7.1.tar.gz#sha256=5c61ad380e9b4c603757c254765e9083a90a820cd0aff1b5d2c6f7fd96c85e80 (from http://mirrors.aliyun.com/pypi/simple/auto-gptq/) (requires-python:>=3.8.0)\u001b[0m: \u001b[33mRequested auto-gptq from http://mirrors.aliyun.com/pypi/packages/90/e5/b22697903982284fe284568fb2663a2196694a8eee637f5cf4ccfe435a38/auto_gptq-0.7.1.tar.gz#sha256=5c61ad380e9b4c603757c254765e9083a90a820cd0aff1b5d2c6f7fd96c85e80 has inconsistent version: expected '0.7.1', but metadata has '0.7.1+cu1241'\u001b[0m\n",
95
+ " Downloading http://mirrors.aliyun.com/pypi/packages/34/71/c3e73cf17681f6ff4754ef8f4cb8b67af3def230fc8711eac1250bbd78d5/auto_gptq-0.7.0.tar.gz (124 kB)\n",
96
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.6/124.6 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
97
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n",
98
+ "\u001b[?25hDiscarding \u001b[4;34mhttp://mirrors.aliyun.com/pypi/packages/34/71/c3e73cf17681f6ff4754ef8f4cb8b67af3def230fc8711eac1250bbd78d5/auto_gptq-0.7.0.tar.gz#sha256=50a5396fae2db5a19446b3198ef0e86ee520846b881db47bdbf4eb9260eac723 (from http://mirrors.aliyun.com/pypi/simple/auto-gptq/) (requires-python:>=3.8.0)\u001b[0m: \u001b[33mRequested auto-gptq from http://mirrors.aliyun.com/pypi/packages/34/71/c3e73cf17681f6ff4754ef8f4cb8b67af3def230fc8711eac1250bbd78d5/auto_gptq-0.7.0.tar.gz#sha256=50a5396fae2db5a19446b3198ef0e86ee520846b881db47bdbf4eb9260eac723 has inconsistent version: expected '0.7.0', but metadata has '0.7.0+cu1241'\u001b[0m\n",
99
+ " Downloading http://mirrors.aliyun.com/pypi/packages/49/af/02b66e55dfd9aeb0ece923843043724ed7432ec0c649ea0f3b9fa1dd90c6/auto_gptq-0.6.0.tar.gz (120 kB)\n",
100
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.0/121.0 kB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
101
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25lerror\n",
102
+ " \u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n",
103
+ " \n",
104
+ " \u001b[31m×\u001b[0m \u001b[32mpython setup.py egg_info\u001b[0m did not run successfully.\n",
105
+ " \u001b[31m│\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n",
106
+ " \u001b[31m╰─>\u001b[0m \u001b[31m[20 lines of output]\u001b[0m\n",
107
+ " \u001b[31m \u001b[0m python: can't open file '/tmp/pip-install-9tzda5pv/auto-gptq_60a9cac9f2ef4bb89bb1ce3f0d0126b0/./autogptq_extension/qigen/generate.py': [Errno 2] No such file or directory\n",
108
+ " \u001b[31m \u001b[0m Traceback (most recent call last):\n",
109
+ " \u001b[31m \u001b[0m File \"/tmp/pip-install-9tzda5pv/auto-gptq_60a9cac9f2ef4bb89bb1ce3f0d0126b0/setup.py\", line 109, in <module>\n",
110
+ " \u001b[31m \u001b[0m subprocess.check_output([\"python\", \"./autogptq_extension/qigen/generate.py\", \"--module\", \"--search\", \"--p\", str(p)])\n",
111
+ " \u001b[31m \u001b[0m File \"/root/miniconda3/lib/python3.12/subprocess.py\", line 466, in check_output\n",
112
+ " \u001b[31m \u001b[0m return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,\n",
113
+ " \u001b[31m \u001b[0m ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
114
+ " \u001b[31m \u001b[0m File \"/root/miniconda3/lib/python3.12/subprocess.py\", line 571, in run\n",
115
+ " \u001b[31m \u001b[0m raise CalledProcessError(retcode, process.args,\n",
116
+ " \u001b[31m \u001b[0m subprocess.CalledProcessError: Command '['python', './autogptq_extension/qigen/generate.py', '--module', '--search', '--p', '48']' returned non-zero exit status 2.\n",
117
+ " \u001b[31m \u001b[0m \n",
118
+ " \u001b[31m \u001b[0m During handling of the above exception, another exception occurred:\n",
119
+ " \u001b[31m \u001b[0m \n",
120
+ " \u001b[31m \u001b[0m Traceback (most recent call last):\n",
121
+ " \u001b[31m \u001b[0m File \"<string>\", line 2, in <module>\n",
122
+ " \u001b[31m \u001b[0m File \"<pip-setuptools-caller>\", line 34, in <module>\n",
123
+ " \u001b[31m \u001b[0m File \"/tmp/pip-install-9tzda5pv/auto-gptq_60a9cac9f2ef4bb89bb1ce3f0d0126b0/setup.py\", line 111, in <module>\n",
124
+ " \u001b[31m \u001b[0m raise Exception(f\"Generating QiGen kernels failed with the error shown above.\")\n",
125
+ " \u001b[31m \u001b[0m Exception: Generating QiGen kernels failed with the error shown above.\n",
126
+ " \u001b[31m \u001b[0m Generating qigen kernels...\n",
127
+ " \u001b[31m \u001b[0m \u001b[31m[end of output]\u001b[0m\n",
128
+ " \n",
129
+ " \u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n",
130
+ "\u001b[1;31merror\u001b[0m: \u001b[1mmetadata-generation-failed\u001b[0m\n",
131
+ "\n",
132
+ "\u001b[31m×\u001b[0m Encountered error while generating package metadata.\n",
133
+ "\u001b[31m╰─>\u001b[0m See above for output.\n",
134
+ "\n",
135
+ "\u001b[1;35mnote\u001b[0m: This is an issue with the package mentioned above, not pip.\n",
136
+ "\u001b[1;36mhint\u001b[0m: See above for details.\n",
137
+ "\u001b[?25h"
138
+ ]
139
+ }
140
+ ],
141
+ "source": [
142
+ "!pip install auto-gptq"
143
+ ]
144
+ },
145
+ {
146
+ "cell_type": "code",
147
+ "execution_count": null,
148
+ "id": "b8c14f99-2926-433f-8249-58cc123ce73d",
149
+ "metadata": {},
150
+ "outputs": [],
151
+ "source": []
152
+ }
153
+ ],
154
+ "metadata": {
155
+ "kernelspec": {
156
+ "display_name": "Python 3 (ipykernel)",
157
+ "language": "python",
158
+ "name": "python3"
159
+ },
160
+ "language_info": {
161
+ "codemirror_mode": {
162
+ "name": "ipython",
163
+ "version": 3
164
+ },
165
+ "file_extension": ".py",
166
+ "mimetype": "text/x-python",
167
+ "name": "python",
168
+ "nbconvert_exporter": "python",
169
+ "pygments_lexer": "ipython3",
170
+ "version": "3.12.3"
171
+ }
172
+ },
173
+ "nbformat": 4,
174
+ "nbformat_minor": 5
175
+ }
config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_attn_implementation_autoset": true,
3
- "_name_or_path": "/root/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/7ae557604adf67be50417f59c2c2f167def9a775",
4
  "architectures": [
5
  "Qwen2ForCausalLM"
6
  ],
@@ -43,7 +43,7 @@
43
  "rope_theta": 1000000.0,
44
  "sliding_window": null,
45
  "tie_word_embeddings": true,
46
- "torch_dtype": "bfloat16",
47
  "transformers_version": "4.47.1",
48
  "use_cache": true,
49
  "use_sliding_window": false,
 
1
  {
2
  "_attn_implementation_autoset": true,
3
+ "_name_or_path": "Qwen2.5-0.5B-Instruct-FP16",
4
  "architectures": [
5
  "Qwen2ForCausalLM"
6
  ],
 
43
  "rope_theta": 1000000.0,
44
  "sliding_window": null,
45
  "tie_word_embeddings": true,
46
+ "torch_dtype": "float16",
47
  "transformers_version": "4.47.1",
48
  "use_cache": true,
49
  "use_sliding_window": false,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8572721112376195fbcc8595a6a204b3bfad41f90101ba5d4e36d205c718e79f
3
- size 641496752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50b54dd93397a6b301fa3c735b5d2763fbcacc9bf63e7cb76e9159a7b378c3ee
3
+ size 641496696
model_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03dc9e241100a8782ad4c8d5c4751bbaf8c02b8a0360f02f84dcb5973925bb79
3
+ size 595928496
quant_log.csv CHANGED
@@ -1,169 +1,169 @@
1
  layer,module,loss,damp,time
2
- 0,self_attn.k_proj,0.00138,0.01000,0.372
3
- 0,self_attn.v_proj,0.00003,0.01000,0.143
4
- 0,self_attn.q_proj,0.01032,0.01000,0.146
5
- 0,self_attn.o_proj,0.00001,0.01000,0.141
6
- 0,mlp.up_proj,0.09639,0.01000,0.147
7
- 0,mlp.gate_proj,0.19389,0.01000,0.144
8
- 0,mlp.down_proj,0.00145,0.01000,0.760
9
- 1,self_attn.k_proj,0.00983,0.01000,0.141
10
- 1,self_attn.v_proj,0.00101,0.01000,0.140
11
- 1,self_attn.q_proj,0.03416,0.01000,0.145
12
- 1,self_attn.o_proj,0.00206,0.01000,0.142
13
- 1,mlp.up_proj,0.10123,0.01000,0.152
14
- 1,mlp.gate_proj,0.19488,0.01000,0.147
15
- 1,mlp.down_proj,0.00114,0.01000,0.763
16
- 2,self_attn.k_proj,0.01729,0.01000,0.143
17
- 2,self_attn.v_proj,0.00211,0.01000,0.141
18
- 2,self_attn.q_proj,0.05901,0.01000,0.143
19
- 2,self_attn.o_proj,0.00084,0.01000,0.144
20
- 2,mlp.up_proj,0.16170,0.01000,0.144
21
- 2,mlp.gate_proj,0.30168,0.01000,0.144
22
- 2,mlp.down_proj,3.40212,0.01000,0.777
23
- 3,self_attn.k_proj,0.02164,0.01000,0.146
24
- 3,self_attn.v_proj,0.00550,0.01000,0.144
25
- 3,self_attn.q_proj,0.08517,0.01000,0.140
26
- 3,self_attn.o_proj,0.00069,0.01000,0.145
27
- 3,mlp.up_proj,1.55705,0.01000,0.149
28
- 3,mlp.gate_proj,14.22241,0.01000,0.153
29
- 3,mlp.down_proj,10.47623,0.01000,0.771
30
- 4,self_attn.k_proj,0.01608,0.01000,0.141
31
- 4,self_attn.v_proj,0.00733,0.01000,0.140
32
- 4,self_attn.q_proj,0.07782,0.01000,0.141
33
- 4,self_attn.o_proj,0.00357,0.01000,0.142
34
- 4,mlp.up_proj,0.26308,0.01000,0.146
35
- 4,mlp.gate_proj,0.44777,0.01000,0.146
36
- 4,mlp.down_proj,0.00393,0.01000,0.773
37
- 5,self_attn.k_proj,0.01548,0.01000,0.141
38
- 5,self_attn.v_proj,0.00733,0.01000,0.143
39
- 5,self_attn.q_proj,0.07514,0.01000,0.142
40
- 5,self_attn.o_proj,0.00126,0.01000,0.142
41
- 5,mlp.up_proj,0.32005,0.01000,0.147
42
- 5,mlp.gate_proj,0.75867,0.01000,0.143
43
- 5,mlp.down_proj,0.00588,0.01000,0.782
44
- 6,self_attn.k_proj,0.02100,0.01000,0.142
45
- 6,self_attn.v_proj,0.00581,0.01000,0.142
46
- 6,self_attn.q_proj,0.08125,0.01000,0.143
47
- 6,self_attn.o_proj,0.00086,0.01000,0.142
48
- 6,mlp.up_proj,0.25930,0.01000,0.146
49
- 6,mlp.gate_proj,0.43217,0.01000,0.147
50
- 6,mlp.down_proj,0.00290,0.01000,0.776
51
- 7,self_attn.k_proj,0.02048,0.01000,0.140
52
- 7,self_attn.v_proj,0.00687,0.01000,0.139
53
- 7,self_attn.q_proj,0.08387,0.01000,0.140
54
- 7,self_attn.o_proj,0.00102,0.01000,0.143
55
- 7,mlp.up_proj,0.27253,0.01000,0.144
56
- 7,mlp.gate_proj,0.38177,0.01000,0.150
57
- 7,mlp.down_proj,0.00328,0.01000,0.786
58
- 8,self_attn.k_proj,0.01920,0.01000,0.144
59
- 8,self_attn.v_proj,0.00666,0.01000,0.139
60
- 8,self_attn.q_proj,0.08629,0.01000,0.142
61
- 8,self_attn.o_proj,0.00223,0.01000,0.145
62
- 8,mlp.up_proj,0.24209,0.01000,0.145
63
- 8,mlp.gate_proj,0.34371,0.01000,0.147
64
- 8,mlp.down_proj,0.00234,0.01000,0.770
65
- 9,self_attn.k_proj,0.03560,0.01000,0.146
66
- 9,self_attn.v_proj,0.01327,0.01000,0.146
67
- 9,self_attn.q_proj,0.15881,0.01000,0.142
68
- 9,self_attn.o_proj,0.00107,0.01000,0.141
69
- 9,mlp.up_proj,0.24913,0.01000,0.145
70
- 9,mlp.gate_proj,0.31356,0.01000,0.153
71
- 9,mlp.down_proj,0.00273,0.01000,0.770
72
- 10,self_attn.k_proj,0.01898,0.01000,0.140
73
- 10,self_attn.v_proj,0.01226,0.01000,0.142
74
- 10,self_attn.q_proj,0.09338,0.01000,0.146
75
- 10,self_attn.o_proj,0.00256,0.01000,0.139
76
- 10,mlp.up_proj,0.19837,0.01000,0.146
77
- 10,mlp.gate_proj,0.25931,0.01000,0.152
78
- 10,mlp.down_proj,0.00200,0.01000,0.781
79
- 11,self_attn.k_proj,0.03752,0.01000,0.142
80
- 11,self_attn.v_proj,0.01387,0.01000,0.139
81
- 11,self_attn.q_proj,0.17363,0.01000,0.144
82
- 11,self_attn.o_proj,0.00194,0.01000,0.142
83
- 11,mlp.up_proj,0.24216,0.01000,0.147
84
- 11,mlp.gate_proj,0.26251,0.01000,0.147
85
- 11,mlp.down_proj,0.00267,0.01000,0.776
86
- 12,self_attn.k_proj,0.01698,0.01000,0.142
87
- 12,self_attn.v_proj,0.00979,0.01000,0.142
88
- 12,self_attn.q_proj,0.08870,0.01000,0.143
89
- 12,self_attn.o_proj,0.00231,0.01000,0.141
90
- 12,mlp.up_proj,0.20748,0.01000,0.146
91
- 12,mlp.gate_proj,0.22905,0.01000,0.148
92
- 12,mlp.down_proj,0.00239,0.01000,0.772
93
- 13,self_attn.k_proj,0.02764,0.01000,0.143
94
- 13,self_attn.v_proj,0.01191,0.01000,0.145
95
- 13,self_attn.q_proj,0.14133,0.01000,0.146
96
- 13,self_attn.o_proj,0.00121,0.01000,0.144
97
- 13,mlp.up_proj,0.24341,0.01000,0.149
98
- 13,mlp.gate_proj,0.24755,0.01000,0.147
99
- 13,mlp.down_proj,0.00313,0.01000,0.776
100
- 14,self_attn.k_proj,0.02157,0.01000,0.144
101
- 14,self_attn.v_proj,0.01694,0.01000,0.143
102
- 14,self_attn.q_proj,0.12651,0.01000,0.145
103
- 14,self_attn.o_proj,0.00391,0.01000,0.143
104
- 14,mlp.up_proj,0.23182,0.01000,0.145
105
- 14,mlp.gate_proj,0.26455,0.01000,0.157
106
- 14,mlp.down_proj,0.00381,0.01000,0.783
107
- 15,self_attn.k_proj,0.01991,0.01000,0.145
108
- 15,self_attn.v_proj,0.01372,0.01000,0.144
109
- 15,self_attn.q_proj,0.10473,0.01000,0.146
110
- 15,self_attn.o_proj,0.00366,0.01000,0.145
111
- 15,mlp.up_proj,0.29403,0.01000,0.149
112
- 15,mlp.gate_proj,0.30643,0.01000,0.145
113
- 15,mlp.down_proj,0.00572,0.01000,0.765
114
- 16,self_attn.k_proj,0.03065,0.01000,0.144
115
- 16,self_attn.v_proj,0.02840,0.01000,0.140
116
- 16,self_attn.q_proj,0.21804,0.01000,0.146
117
- 16,self_attn.o_proj,0.00136,0.01000,0.143
118
- 16,mlp.up_proj,0.34904,0.01000,0.146
119
- 16,mlp.gate_proj,0.45293,0.01000,0.147
120
- 16,mlp.down_proj,0.01072,0.01000,0.766
121
- 17,self_attn.k_proj,0.02077,0.01000,0.145
122
- 17,self_attn.v_proj,0.02172,0.01000,0.145
123
- 17,self_attn.q_proj,0.13563,0.01000,0.144
124
- 17,self_attn.o_proj,0.00379,0.01000,0.142
125
- 17,mlp.up_proj,0.40654,0.01000,0.150
126
- 17,mlp.gate_proj,0.63069,0.01000,0.153
127
- 17,mlp.down_proj,0.00786,0.01000,0.771
128
- 18,self_attn.k_proj,0.02198,0.01000,0.149
129
- 18,self_attn.v_proj,0.01774,0.01000,0.141
130
- 18,self_attn.q_proj,0.10674,0.01000,0.145
131
- 18,self_attn.o_proj,0.00129,0.01000,0.143
132
- 18,mlp.up_proj,0.38269,0.01000,0.148
133
- 18,mlp.gate_proj,0.47625,0.01000,0.147
134
- 18,mlp.down_proj,0.00765,0.01000,0.777
135
- 19,self_attn.k_proj,0.01776,0.01000,0.144
136
- 19,self_attn.v_proj,0.01706,0.01000,0.142
137
- 19,self_attn.q_proj,0.11138,0.01000,0.143
138
- 19,self_attn.o_proj,0.00345,0.01000,0.143
139
- 19,mlp.up_proj,0.57152,0.01000,0.146
140
- 19,mlp.gate_proj,0.68538,0.01000,0.148
141
- 19,mlp.down_proj,0.01574,0.01000,0.783
142
- 20,self_attn.k_proj,0.02558,0.01000,0.145
143
- 20,self_attn.v_proj,0.05435,0.01000,0.146
144
- 20,self_attn.q_proj,0.16357,0.01000,0.140
145
- 20,self_attn.o_proj,0.01051,0.01000,0.144
146
- 20,mlp.up_proj,0.65037,0.01000,0.145
147
- 20,mlp.gate_proj,0.71838,0.01000,0.147
148
- 20,mlp.down_proj,0.02858,0.01000,0.766
149
- 21,self_attn.k_proj,0.03181,0.01000,0.144
150
- 21,self_attn.v_proj,0.10603,0.01000,0.144
151
- 21,self_attn.q_proj,0.23383,0.01000,0.144
152
- 21,self_attn.o_proj,0.02307,0.01000,0.144
153
- 21,mlp.up_proj,1.75373,0.01000,0.158
154
- 21,mlp.gate_proj,3.94375,0.01000,0.146
155
- 21,mlp.down_proj,17.75035,0.01000,0.779
156
- 22,self_attn.k_proj,0.02056,0.01000,0.141
157
- 22,self_attn.v_proj,0.06087,0.01000,0.140
158
- 22,self_attn.q_proj,0.15773,0.01000,0.148
159
- 22,self_attn.o_proj,0.01726,0.01000,0.140
160
- 22,mlp.up_proj,0.53556,0.01000,0.144
161
- 22,mlp.gate_proj,0.51474,0.01000,0.144
162
- 22,mlp.down_proj,0.04907,0.01000,0.769
163
- 23,self_attn.k_proj,0.02615,0.01000,0.176
164
- 23,self_attn.v_proj,0.05774,0.01000,0.209
165
- 23,self_attn.q_proj,0.20516,0.01000,0.209
166
- 23,self_attn.o_proj,0.03206,0.01000,0.210
167
- 23,mlp.up_proj,0.83435,0.01000,0.214
168
- 23,mlp.gate_proj,0.87210,0.01000,0.224
169
- 23,mlp.down_proj,0.48671,0.01000,0.784
 
1
  layer,module,loss,damp,time
2
+ 0,self_attn.k_proj,0.00136,0.01000,0.325
3
+ 0,self_attn.v_proj,0.00003,0.01000,0.129
4
+ 0,self_attn.q_proj,0.01018,0.01000,0.137
5
+ 0,self_attn.o_proj,0.00001,0.01000,0.134
6
+ 0,mlp.up_proj,0.09437,0.01000,0.139
7
+ 0,mlp.gate_proj,0.18865,0.01000,0.139
8
+ 0,mlp.down_proj,0.00143,0.01000,0.735
9
+ 1,self_attn.k_proj,0.00999,0.01000,0.135
10
+ 1,self_attn.v_proj,0.00103,0.01000,0.135
11
+ 1,self_attn.q_proj,0.03432,0.01000,0.135
12
+ 1,self_attn.o_proj,0.00249,0.01000,0.132
13
+ 1,mlp.up_proj,0.10145,0.01000,0.145
14
+ 1,mlp.gate_proj,0.19494,0.01000,0.138
15
+ 1,mlp.down_proj,0.00111,0.01000,0.741
16
+ 2,self_attn.k_proj,0.01739,0.01000,0.135
17
+ 2,self_attn.v_proj,0.00208,0.01000,0.130
18
+ 2,self_attn.q_proj,0.05895,0.01000,0.135
19
+ 2,self_attn.o_proj,0.00084,0.01000,0.132
20
+ 2,mlp.up_proj,0.15928,0.01000,0.139
21
+ 2,mlp.gate_proj,0.29904,0.01000,0.137
22
+ 2,mlp.down_proj,3.25813,0.01000,0.724
23
+ 3,self_attn.k_proj,0.02043,0.01000,0.141
24
+ 3,self_attn.v_proj,0.00490,0.01000,0.138
25
+ 3,self_attn.q_proj,0.08232,0.01000,0.139
26
+ 3,self_attn.o_proj,0.00070,0.01000,0.141
27
+ 3,mlp.up_proj,1.52620,0.01000,0.140
28
+ 3,mlp.gate_proj,13.87913,0.01000,0.153
29
+ 3,mlp.down_proj,3.20606,0.01000,0.734
30
+ 4,self_attn.k_proj,0.01640,0.01000,0.135
31
+ 4,self_attn.v_proj,0.00691,0.01000,0.131
32
+ 4,self_attn.q_proj,0.07569,0.01000,0.132
33
+ 4,self_attn.o_proj,0.00309,0.01000,0.134
34
+ 4,mlp.up_proj,0.24986,0.01000,0.136
35
+ 4,mlp.gate_proj,0.42210,0.01000,0.138
36
+ 4,mlp.down_proj,0.00362,0.01000,0.737
37
+ 5,self_attn.k_proj,0.01574,0.01000,0.137
38
+ 5,self_attn.v_proj,0.00762,0.01000,0.133
39
+ 5,self_attn.q_proj,0.07645,0.01000,0.136
40
+ 5,self_attn.o_proj,0.00106,0.01000,0.138
41
+ 5,mlp.up_proj,0.30390,0.01000,0.143
42
+ 5,mlp.gate_proj,0.71931,0.01000,0.147
43
+ 5,mlp.down_proj,0.00614,0.01000,0.733
44
+ 6,self_attn.k_proj,0.02019,0.01000,0.139
45
+ 6,self_attn.v_proj,0.00567,0.01000,0.138
46
+ 6,self_attn.q_proj,0.07846,0.01000,0.140
47
+ 6,self_attn.o_proj,0.00102,0.01000,0.140
48
+ 6,mlp.up_proj,0.25922,0.01000,0.147
49
+ 6,mlp.gate_proj,0.42813,0.01000,0.144
50
+ 6,mlp.down_proj,0.00318,0.01000,0.753
51
+ 7,self_attn.k_proj,0.02236,0.01000,0.144
52
+ 7,self_attn.v_proj,0.00721,0.01000,0.146
53
+ 7,self_attn.q_proj,0.08888,0.01000,0.135
54
+ 7,self_attn.o_proj,0.00106,0.01000,0.140
55
+ 7,mlp.up_proj,0.27111,0.01000,0.147
56
+ 7,mlp.gate_proj,0.37811,0.01000,0.143
57
+ 7,mlp.down_proj,0.00381,0.01000,0.762
58
+ 8,self_attn.k_proj,0.01918,0.01000,0.137
59
+ 8,self_attn.v_proj,0.00650,0.01000,0.138
60
+ 8,self_attn.q_proj,0.08556,0.01000,0.140
61
+ 8,self_attn.o_proj,0.00224,0.01000,0.140
62
+ 8,mlp.up_proj,0.23711,0.01000,0.140
63
+ 8,mlp.gate_proj,0.33877,0.01000,0.141
64
+ 8,mlp.down_proj,0.00247,0.01000,0.745
65
+ 9,self_attn.k_proj,0.03478,0.01000,0.141
66
+ 9,self_attn.v_proj,0.01390,0.01000,0.145
67
+ 9,self_attn.q_proj,0.16694,0.01000,0.139
68
+ 9,self_attn.o_proj,0.00133,0.01000,0.140
69
+ 9,mlp.up_proj,0.25157,0.01000,0.142
70
+ 9,mlp.gate_proj,0.31524,0.01000,0.148
71
+ 9,mlp.down_proj,0.00306,0.01000,0.780
72
+ 10,self_attn.k_proj,0.02065,0.01000,0.140
73
+ 10,self_attn.v_proj,0.01247,0.01000,0.139
74
+ 10,self_attn.q_proj,0.09851,0.01000,0.141
75
+ 10,self_attn.o_proj,0.00224,0.01000,0.141
76
+ 10,mlp.up_proj,0.20222,0.01000,0.145
77
+ 10,mlp.gate_proj,0.26411,0.01000,0.144
78
+ 10,mlp.down_proj,0.00195,0.01000,0.758
79
+ 11,self_attn.k_proj,0.03751,0.01000,0.138
80
+ 11,self_attn.v_proj,0.01373,0.01000,0.137
81
+ 11,self_attn.q_proj,0.16694,0.01000,0.138
82
+ 11,self_attn.o_proj,0.00163,0.01000,0.136
83
+ 11,mlp.up_proj,0.23553,0.01000,0.142
84
+ 11,mlp.gate_proj,0.25575,0.01000,0.141
85
+ 11,mlp.down_proj,0.00258,0.01000,0.766
86
+ 12,self_attn.k_proj,0.01699,0.01000,0.141
87
+ 12,self_attn.v_proj,0.00987,0.01000,0.143
88
+ 12,self_attn.q_proj,0.09044,0.01000,0.149
89
+ 12,self_attn.o_proj,0.00252,0.01000,0.145
90
+ 12,mlp.up_proj,0.20484,0.01000,0.146
91
+ 12,mlp.gate_proj,0.22553,0.01000,0.146
92
+ 12,mlp.down_proj,0.00257,0.01000,0.767
93
+ 13,self_attn.k_proj,0.02551,0.01000,0.135
94
+ 13,self_attn.v_proj,0.01122,0.01000,0.136
95
+ 13,self_attn.q_proj,0.13072,0.01000,0.146
96
+ 13,self_attn.o_proj,0.00144,0.01000,0.134
97
+ 13,mlp.up_proj,0.23847,0.01000,0.147
98
+ 13,mlp.gate_proj,0.24350,0.01000,0.151
99
+ 13,mlp.down_proj,0.00315,0.01000,0.778
100
+ 14,self_attn.k_proj,0.02141,0.01000,0.139
101
+ 14,self_attn.v_proj,0.01697,0.01000,0.135
102
+ 14,self_attn.q_proj,0.12928,0.01000,0.140
103
+ 14,self_attn.o_proj,0.00471,0.01000,0.136
104
+ 14,mlp.up_proj,0.22936,0.01000,0.139
105
+ 14,mlp.gate_proj,0.26044,0.01000,0.147
106
+ 14,mlp.down_proj,0.00375,0.01000,0.742
107
+ 15,self_attn.k_proj,0.02037,0.01000,0.137
108
+ 15,self_attn.v_proj,0.01484,0.01000,0.136
109
+ 15,self_attn.q_proj,0.10820,0.01000,0.140
110
+ 15,self_attn.o_proj,0.00345,0.01000,0.138
111
+ 15,mlp.up_proj,0.30909,0.01000,0.143
112
+ 15,mlp.gate_proj,0.32287,0.01000,0.149
113
+ 15,mlp.down_proj,0.00640,0.01000,0.748
114
+ 16,self_attn.k_proj,0.02956,0.01000,0.139
115
+ 16,self_attn.v_proj,0.02730,0.01000,0.136
116
+ 16,self_attn.q_proj,0.21341,0.01000,0.137
117
+ 16,self_attn.o_proj,0.00164,0.01000,0.142
118
+ 16,mlp.up_proj,0.36086,0.01000,0.146
119
+ 16,mlp.gate_proj,0.47303,0.01000,0.153
120
+ 16,mlp.down_proj,0.01073,0.01000,0.753
121
+ 17,self_attn.k_proj,0.02010,0.01000,0.144
122
+ 17,self_attn.v_proj,0.02237,0.01000,0.138
123
+ 17,self_attn.q_proj,0.13404,0.01000,0.138
124
+ 17,self_attn.o_proj,0.00350,0.01000,0.138
125
+ 17,mlp.up_proj,0.42924,0.01000,0.140
126
+ 17,mlp.gate_proj,0.66441,0.01000,0.142
127
+ 17,mlp.down_proj,0.00841,0.01000,0.736
128
+ 18,self_attn.k_proj,0.02365,0.01000,0.142
129
+ 18,self_attn.v_proj,0.01843,0.01000,0.133
130
+ 18,self_attn.q_proj,0.11482,0.01000,0.138
131
+ 18,self_attn.o_proj,0.00114,0.01000,0.136
132
+ 18,mlp.up_proj,0.41826,0.01000,0.142
133
+ 18,mlp.gate_proj,0.52589,0.01000,0.144
134
+ 18,mlp.down_proj,0.00837,0.01000,0.747
135
+ 19,self_attn.k_proj,0.01885,0.01000,0.144
136
+ 19,self_attn.v_proj,0.01739,0.01000,0.145
137
+ 19,self_attn.q_proj,0.11650,0.01000,0.139
138
+ 19,self_attn.o_proj,0.00337,0.01000,0.143
139
+ 19,mlp.up_proj,0.61293,0.01000,0.146
140
+ 19,mlp.gate_proj,0.73786,0.01000,0.147
141
+ 19,mlp.down_proj,0.01766,0.01000,0.743
142
+ 20,self_attn.k_proj,0.02636,0.01000,0.137
143
+ 20,self_attn.v_proj,0.05430,0.01000,0.137
144
+ 20,self_attn.q_proj,0.16743,0.01000,0.145
145
+ 20,self_attn.o_proj,0.01072,0.01000,0.140
146
+ 20,mlp.up_proj,0.71327,0.01000,0.136
147
+ 20,mlp.gate_proj,0.78980,0.01000,0.147
148
+ 20,mlp.down_proj,0.02940,0.01000,0.730
149
+ 21,self_attn.k_proj,0.03263,0.01000,0.137
150
+ 21,self_attn.v_proj,0.10656,0.01000,0.136
151
+ 21,self_attn.q_proj,0.22666,0.01000,0.139
152
+ 21,self_attn.o_proj,0.02572,0.01000,0.138
153
+ 21,mlp.up_proj,1.93951,0.01000,0.140
154
+ 21,mlp.gate_proj,4.54990,0.01000,0.143
155
+ 21,mlp.down_proj,17.82846,0.01000,0.752
156
+ 22,self_attn.k_proj,0.02069,0.01000,0.143
157
+ 22,self_attn.v_proj,0.06015,0.01000,0.136
158
+ 22,self_attn.q_proj,0.15573,0.01000,0.143
159
+ 22,self_attn.o_proj,0.01185,0.01000,0.138
160
+ 22,mlp.up_proj,0.59293,0.01000,0.144
161
+ 22,mlp.gate_proj,0.57112,0.01000,0.149
162
+ 22,mlp.down_proj,0.05477,0.01000,0.768
163
+ 23,self_attn.k_proj,0.02633,0.01000,0.137
164
+ 23,self_attn.v_proj,0.05556,0.01000,0.141
165
+ 23,self_attn.q_proj,0.19868,0.01000,0.136
166
+ 23,self_attn.o_proj,0.03036,0.01000,0.139
167
+ 23,mlp.up_proj,0.86650,0.01000,0.141
168
+ 23,mlp.gate_proj,0.89305,0.01000,0.142
169
+ 23,mlp.down_proj,0.48127,0.01000,0.736
untitled.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from safetensors import safe_open
3
+ from safetensors.torch import save_file
4
+
5
+ # 加载 safetensors 文件
6
+ model_path = "model.safetensors"
7
+ with safe_open(model_path, framework="pt", device="cpu") as f:
8
+ state_dict = {key: f.get_tensor(key) for key in f.keys()}
9
+
10
+ # 将 BF16 转换为 FP16
11
+ fp16_state_dict = {key: value.to(torch.float16) for key, value in state_dict.items()}
12
+
13
+ # 保存为新的 safetensors 文件
14
+ output_path = "model_fp16.safetensors"
15
+ save_file(fp16_state_dict, output_path)