Anthonyg5005 commited on
Commit
519cef8
·
verified ·
1 Parent(s): 31ca9cd

Upload EXL2_Private_Quant_V1.ipynb

Browse files
Files changed (1) hide show
  1. EXL2_Private_Quant_V1.ipynb +154 -0
EXL2_Private_Quant_V1.ipynb ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "T4"
8
+ },
9
+ "kernelspec": {
10
+ "name": "python3",
11
+ "display_name": "Python 3"
12
+ },
13
+ "language_info": {
14
+ "name": "python"
15
+ },
16
+ "accelerator": "GPU"
17
+ },
18
+ "cells": [
19
+ {
20
+ "cell_type": "markdown",
21
+ "source": [
22
+ "#Quantizing huggingface models to exl2\n",
23
+ "This version of my exl2 quantize colab creates a single quantizaion to download privatly.\\\n",
24
+ "To calculate an estimate for VRAM size use: [NyxKrage/LLM-Model-VRAM-Calculator](https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator)\\\n",
25
+ "Not all models and architectures are compatible with exl2.\\\n",
26
+ "Will upload to private hf repo in future."
27
+ ],
28
+ "metadata": {
29
+ "id": "Ku0ezvyD42ng"
30
+ }
31
+ },
32
+ {
33
+ "cell_type": "code",
34
+ "execution_count": null,
35
+ "metadata": {
36
+ "cellView": "form",
37
+ "id": "G7zSk2LWHtPU"
38
+ },
39
+ "outputs": [],
40
+ "source": [
41
+ "#@title Download and install environment\n",
42
+ "!git clone https://github.com/turboderp/exllamav2\n",
43
+ "%cd exllamav2\n",
44
+ "print(\"Installing pip dependencies\")\n",
45
+ "!pip install -q -r requirements.txt\n",
46
+ "!pip install -q huggingface_hub requests tqdm\n",
47
+ "!wget https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/download-model.py\n",
48
+ "modeldw = \"none\""
49
+ ]
50
+ },
51
+ {
52
+ "cell_type": "code",
53
+ "source": [
54
+ "#@title Login to HF (Required only for gated models)\n",
55
+ "#@markdown From my Colab/Kaggle login script on [Anthonyg5005/hf-scripts](https://huggingface.co/Anthonyg5005/hf-scripts/blob/main/HF%20Login%20Snippet%20Kaggle.py)\n",
56
+ "#import required functions\n",
57
+ "import os\n",
58
+ "from huggingface_hub import login, get_token, whoami\n",
59
+ "\n",
60
+ "#get token\n",
61
+ "if os.environ.get('KAGGLE_KERNEL_RUN_TYPE', None) is not None: #check if user in kaggle\n",
62
+ " from kaggle_secrets import UserSecretsClient\n",
63
+ " from kaggle_web_client import BackendError\n",
64
+ " try:\n",
65
+ " login(UserSecretsClient().get_secret(\"HF_TOKEN\")) #login if token secret found\n",
66
+ " except BackendError:\n",
67
+ " print('''\n",
68
+ " When using Kaggle, make sure to use the secret key HF_TOKEN.\n",
69
+ " This will prevent the need to login every time you run the script.\n",
70
+ " Set your secrets with the secrets add-on on the top of the screen.\n",
71
+ " ''')\n",
72
+ "if get_token() is not None:\n",
73
+ " #if the token is found then log in:\n",
74
+ " login(get_token())\n",
75
+ "else:\n",
76
+ " #if the token is not found then prompt user to provide it:\n",
77
+ " login(input(\"API token not detected. Enter your HuggingFace (WRITE) token: \"))"
78
+ ],
79
+ "metadata": {
80
+ "cellView": "form",
81
+ "id": "8Hl3fQmRLybp"
82
+ },
83
+ "execution_count": null,
84
+ "outputs": []
85
+ },
86
+ {
87
+ "cell_type": "code",
88
+ "source": [
89
+ "#@title ##Choose HF model to download\n",
90
+ "#@markdown Weights must be stored in safetensors\n",
91
+ "if modeldw != \"none\":\n",
92
+ " !rm {model}-{BPW}bpw.zip\n",
93
+ " !rm -r {model}-exl2-{BPW}bpw\n",
94
+ "User = \"meta-llama\" # @param {type:\"string\"}\n",
95
+ "Repo = \"Llama-2-7b-chat-hf\" # @param {type:\"string\"}\n",
96
+ "modeldw = f\"{User}/{Repo}\"\n",
97
+ "model = f\"{User}_{Repo}\"\n",
98
+ "!python download-model.py {modeldw}"
99
+ ],
100
+ "metadata": {
101
+ "cellView": "form",
102
+ "id": "NI1LUMD7H-Zx"
103
+ },
104
+ "execution_count": null,
105
+ "outputs": []
106
+ },
107
+ {
108
+ "cell_type": "code",
109
+ "source": [
110
+ "#@title Quantize the model\n",
111
+ "#@markdown ###Takes ~13 minutes to start quantizing first time, then quantization will last based on model size\n",
112
+ "#@markdown Target bits per weight:\n",
113
+ "BPW = \"4.125\" # @param {type:\"string\"}\n",
114
+ "!mkdir {model}-exl2-{BPW}bpw-WD\n",
115
+ "!mkdir {model}-exl2-{BPW}bpw\n",
116
+ "!cp models/{model}/config.json {model}-exl2-{BPW}bpw-WD\n",
117
+ "#@markdown Calibrate with dataset, may improve model output: (NOT WORKING YET)\n",
118
+ "Calibrate = False # @param {type:\"boolean\"}\n",
119
+ "#@markdown Calibration dataset, check above (must be parquet file):\n",
120
+ "dataset = \"wikitext\" # @param {type:\"string\"}\n",
121
+ "if Calibrate == True:\n",
122
+ " quant = f\"convert.py -i models/{model} -o {model}-exl2-{BPW}bpw-WD -cf {model}-exl2-{BPW}bpw -c {dataset} -b {BPW}\"\n",
123
+ "else:\n",
124
+ " quant = f\"convert.py -i models/{model} -o {model}-exl2-{BPW}bpw-WD -cf {model}-exl2-{BPW}bpw -b {BPW}\"\n",
125
+ "!python {quant}"
126
+ ],
127
+ "metadata": {
128
+ "id": "8anbEbGyNmBI",
129
+ "cellView": "form"
130
+ },
131
+ "execution_count": null,
132
+ "outputs": []
133
+ },
134
+ {
135
+ "cell_type": "code",
136
+ "source": [
137
+ "#@title Zip and download the model\n",
138
+ "!rm -r {model}-exl2-{BPW}bpw-WD\n",
139
+ "!rm -r models/{model}\n",
140
+ "print(\"Zipping. May take a few minutes\")\n",
141
+ "!zip -r {model}-{BPW}bpw.zip {model}-exl2-{BPW}bpw\n",
142
+ "from google.colab import files\n",
143
+ "files.download(f\"{model}-{BPW}bpw.zip\")\n",
144
+ "print(\"Colab download speeds very slow so download will take a while\")"
145
+ ],
146
+ "metadata": {
147
+ "cellView": "form",
148
+ "id": "XORLS2uPrbma"
149
+ },
150
+ "execution_count": null,
151
+ "outputs": []
152
+ }
153
+ ]
154
+ }