Anthonyg5005 commited on
Commit
d303ab6
·
verified ·
1 Parent(s): f193868

Upload EXL2_Private_Quant_V2.ipynb

Browse files
Files changed (1) hide show
  1. EXL2_Private_Quant_V2.ipynb +191 -0
EXL2_Private_Quant_V2.ipynb ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "T4"
8
+ },
9
+ "kernelspec": {
10
+ "name": "python3",
11
+ "display_name": "Python 3"
12
+ },
13
+ "language_info": {
14
+ "name": "python"
15
+ },
16
+ "accelerator": "GPU"
17
+ },
18
+ "cells": [
19
+ {
20
+ "cell_type": "markdown",
21
+ "source": [
22
+ "#Quantizing huggingface models to exl2\n",
23
+ "This version of my exl2 quantize colab creates a single quantizaion to upload privatly.\\\n",
24
+ "To calculate an estimate for VRAM size use: [NyxKrage/LLM-Model-VRAM-Calculator](https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator)\\\n",
25
+ "Not all models and architectures are compatible with exl2."
26
+ ],
27
+ "metadata": {
28
+ "id": "Ku0ezvyD42ng"
29
+ }
30
+ },
31
+ {
32
+ "cell_type": "code",
33
+ "execution_count": null,
34
+ "metadata": {
35
+ "cellView": "form",
36
+ "id": "G7zSk2LWHtPU"
37
+ },
38
+ "outputs": [],
39
+ "source": [
40
+ "#@title Download and install environment\n",
41
+ "!git clone https://github.com/turboderp/exllamav2\n",
42
+ "%cd exllamav2\n",
43
+ "print(\"Installing pip dependencies\")\n",
44
+ "!pip install -q -r requirements.txt\n",
45
+ "!pip install -q huggingface_hub requests tqdm\n",
46
+ "#@markdown Uses [download-model.py](https://github.com/oobabooga/text-generation-webui/blob/main/download-model.py) by [oobabooga](https://github.com/oobabooga)\n",
47
+ "!wget https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/download-model.py\n",
48
+ "model = \"none\"\n",
49
+ "dsd = 'false'"
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "code",
54
+ "source": [
55
+ "#@title Login to HF (Required to upload files)\n",
56
+ "#@markdown From my Colab/Kaggle login script on [Anthonyg5005/hf-scripts](https://huggingface.co/Anthonyg5005/hf-scripts/blob/main/HF%20Login%20Snippet%20Kaggle.py)\n",
57
+ "#import required functions\n",
58
+ "import os\n",
59
+ "from huggingface_hub import login, get_token, whoami\n",
60
+ "\n",
61
+ "#get token\n",
62
+ "if os.environ.get('KAGGLE_KERNEL_RUN_TYPE', None) is not None: #check if user in kaggle\n",
63
+ " from kaggle_secrets import UserSecretsClient\n",
64
+ " from kaggle_web_client import BackendError\n",
65
+ " try:\n",
66
+ " login(UserSecretsClient().get_secret(\"HF_TOKEN\")) #login if token secret found\n",
67
+ " except BackendError:\n",
68
+ " print('''\n",
69
+ " When using Kaggle, make sure to use the secret key HF_TOKEN with a 'WRITE' token.\n",
70
+ " This will prevent the need to login every time you run the script.\n",
71
+ " Set your secrets with the secrets add-on on the top of the screen.\n",
72
+ " ''')\n",
73
+ "if get_token() is not None:\n",
74
+ " #if the token is found then log in:\n",
75
+ " login(get_token())\n",
76
+ "else:\n",
77
+ " #if the token is not found then prompt user to provide it:\n",
78
+ " login(input(\"API token not detected. Enter your HuggingFace (WRITE) token: \"))\n",
79
+ "\n",
80
+ "#if the token is read only then prompt user to provide a write token (Only required if user needs a WRITE token, remove if READ is enough):\n",
81
+ "while True:\n",
82
+ " if whoami().get('auth', {}).get('accessToken', {}).get('role', None) != 'write':\n",
83
+ " if os.environ.get('HF_TOKEN', None) is not None: #if environ finds HF_TOKEN as read-only then display following text and exit:\n",
84
+ " print('''\n",
85
+ " You have the environment variable HF_TOKEN set.\n",
86
+ " You cannot log in.\n",
87
+ " Either set the environment variable to a 'WRITE' token or remove it.\n",
88
+ " ''')\n",
89
+ " input(\"Press enter to continue.\")\n",
90
+ " exit()\n",
91
+ " if os.environ.get('COLAB_BACKEND_VERSION', None) is not None:\n",
92
+ " print('''\n",
93
+ " Your Colab secret key is read-only\n",
94
+ " Please switch your key to 'write' or disable notebook access on the left.\n",
95
+ " For now, you are stuck in a loop\n",
96
+ " ''')\n",
97
+ " elif os.environ.get('KAGGLE_KERNEL_RUN_TYPE', None) is not None:\n",
98
+ " print('''\n",
99
+ " Your Kaggle secret key is read-only\n",
100
+ " Please switch your key to 'write' or unattach from notebook in add-ons at the top.\n",
101
+ " Having a read-only key attched will require login every time.\n",
102
+ " ''')\n",
103
+ " print(\"You do not have write access to this repository. Please use a valid token with (WRITE) access.\")\n",
104
+ " login(input(\"Enter your HuggingFace (WRITE) token: \"))\n",
105
+ " continue\n",
106
+ " break"
107
+ ],
108
+ "metadata": {
109
+ "cellView": "form",
110
+ "id": "8Hl3fQmRLybp"
111
+ },
112
+ "execution_count": null,
113
+ "outputs": []
114
+ },
115
+ {
116
+ "cell_type": "code",
117
+ "source": [
118
+ "#@title ##Choose HF model to download\n",
119
+ "#@markdown ###Repo should be formatted as user/repo\n",
120
+ "#@markdown Weights must be stored in safetensors\n",
121
+ "if model != \"none\":\n",
122
+ " !rm {model}-{BPW}bpw.zip\n",
123
+ " !rm -r {model}-exl2-{BPW}bpw\n",
124
+ "repo_url = \"mistralai/Mistral-7B-Instruct-v0.2\" # @param {type:\"string\"}\n",
125
+ "model = repo_url.replace(\"/\", \"_\")\n",
126
+ "!python download-model.py {repo_url}"
127
+ ],
128
+ "metadata": {
129
+ "cellView": "form",
130
+ "id": "NI1LUMD7H-Zx"
131
+ },
132
+ "execution_count": null,
133
+ "outputs": []
134
+ },
135
+ {
136
+ "cell_type": "code",
137
+ "source": [
138
+ "#@title Quantize the model\n",
139
+ "#@markdown ###Takes ~13 minutes to start quantizing first time, then quantization will last based on model size\n",
140
+ "#@markdown Target bits per weight:\n",
141
+ "BPW = \"4.125\" # @param {type:\"string\"}\n",
142
+ "!mkdir {model}-exl2-{BPW}bpw-WD\n",
143
+ "!mkdir {model}-exl2-{BPW}bpw\n",
144
+ "!cp models/{model}/config.json {model}-exl2-{BPW}bpw-WD\n",
145
+ "#@markdown Calibrate with dataset, may improve model output (optional):\n",
146
+ "Calibrate = True # @param {type:\"boolean\"}\n",
147
+ "#@markdown Calibration dataset, enable calibrate above (must be parquet file):\n",
148
+ "if Calibrate == True:\n",
149
+ " dataset_url = \"https://huggingface.co/datasets/wikitext/resolve/refs%2Fconvert%2Fparquet/wikitext-103-v1/test/0000.parquet?download=true\" # @param {type:\"string\"}\n",
150
+ " dataset_url = dataset_url.replace(\"?download=true\", \"\")\n",
151
+ " if dsd == 'false':\n",
152
+ " !wget {dataset_url}\n",
153
+ " dsd = 'true'\n",
154
+ " dataset = dataset_url.split(\"/\")[-1]\n",
155
+ "#@markdown To use a calibration dataset, enter the huggingface resolve url. Right click the download button and copy the link. Afterwards, paste the link into dataset_url.\n",
156
+ "#@markdown ![Example Image](https://huggingface.co/Anthonyg5005/hf-scripts/resolve/main/Screenshot%202024-03-17%20011855.png \"Copy from download button\")\n",
157
+ "if Calibrate == True:\n",
158
+ " quant = f\"convert.py -i models/{model} -o {model}-exl2-{BPW}bpw-WD -cf {model}-exl2-{BPW}bpw -c {dataset} -b {BPW}\"\n",
159
+ "else:\n",
160
+ " quant = f\"convert.py -i models/{model} -o {model}-exl2-{BPW}bpw-WD -cf {model}-exl2-{BPW}bpw -b {BPW}\"\n",
161
+ "!python {quant}"
162
+ ],
163
+ "metadata": {
164
+ "id": "8anbEbGyNmBI",
165
+ "cellView": "form"
166
+ },
167
+ "execution_count": null,
168
+ "outputs": []
169
+ },
170
+ {
171
+ "cell_type": "code",
172
+ "source": [
173
+ "#@title Upload to huggingface privately\n",
174
+ "#@markdown You may also set it to public but I'd recommend waiting for my next ipynb that will create mutliple quants and place them all into individual branches.\n",
175
+ "!rm -r {model}-exl2-{BPW}bpw-WD\n",
176
+ "!rm -r models/{model}\n",
177
+ "print(\"Uploading to Huggingface. May take a while\")\n",
178
+ "from huggingface_hub import HfApi, whoami, create_repo\n",
179
+ "create_repo(f\"{whoami().get('name', None)}/{model}-exl2-{BPW}bpw\", private=True)\n",
180
+ "HfApi().upload_folder(folder_path=f\"{model}-exl2-{BPW}bpw\", repo_id=f\"{whoami().get('name', None)}/{model}-exl2-{BPW}bpw\", repo_type=\"model\", commit_message=\"Upload from Colab automation\")\n",
181
+ "print(f\"uploaded to {whoami().get('name', None)}/{model}-exl2-{BPW}bpw\")"
182
+ ],
183
+ "metadata": {
184
+ "cellView": "form",
185
+ "id": "XORLS2uPrbma"
186
+ },
187
+ "execution_count": null,
188
+ "outputs": []
189
+ }
190
+ ]
191
+ }