Anthonyg5005
/

hf-scripts

English

Model card Files Files and versions Community

Anthonyg5005 commited on Mar 17, 2024

Commit

519cef8

verified ·

1 Parent(s): 31ca9cd

Upload EXL2_Private_Quant_V1.ipynb

Browse files

Files changed (1) hide show

EXL2_Private_Quant_V1.ipynb +154 -0

EXL2_Private_Quant_V1.ipynb ADDED Viewed

	@@ -0,0 +1,154 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "#Quantizing huggingface models to exl2\n",
+        "This version of my exl2 quantize colab creates a single quantizaion to download privatly.\\\n",
+        "To calculate an estimate for VRAM size use: [NyxKrage/LLM-Model-VRAM-Calculator](https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator)\\\n",
+        "Not all models and architectures are compatible with exl2.\\\n",
+        "Will upload to private hf repo in future."
+      ],
+      "metadata": {
+        "id": "Ku0ezvyD42ng"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "G7zSk2LWHtPU"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Download and install environment\n",
+        "!git clone https://github.com/turboderp/exllamav2\n",
+        "%cd exllamav2\n",
+        "print(\"Installing pip dependencies\")\n",
+        "!pip install -q -r requirements.txt\n",
+        "!pip install -q huggingface_hub requests tqdm\n",
+        "!wget https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/download-model.py\n",
+        "modeldw = \"none\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title Login to HF (Required only for gated models)\n",
+        "#@markdown From my Colab/Kaggle login script on [Anthonyg5005/hf-scripts](https://huggingface.co/Anthonyg5005/hf-scripts/blob/main/HF%20Login%20Snippet%20Kaggle.py)\n",
+        "#import required functions\n",
+        "import os\n",
+        "from huggingface_hub import login, get_token, whoami\n",
+        "\n",
+        "#get token\n",
+        "if os.environ.get('KAGGLE_KERNEL_RUN_TYPE', None) is not None: #check if user in kaggle\n",
+        "    from kaggle_secrets import UserSecretsClient\n",
+        "    from kaggle_web_client import BackendError\n",
+        "    try:\n",
+        "        login(UserSecretsClient().get_secret(\"HF_TOKEN\")) #login if token secret found\n",
+        "    except BackendError:\n",
+        "        print('''\n",
+        "                      When using Kaggle, make sure to use the secret key HF_TOKEN.\n",
+        "                   This will prevent the need to login every time you run the script.\n",
+        "                   Set your secrets with the secrets add-on on the top of the screen.\n",
+        "             ''')\n",
+        "if get_token() is not None:\n",
+        "    #if the token is found then log in:\n",
+        "    login(get_token())\n",
+        "else:\n",
+        "    #if the token is not found then prompt user to provide it:\n",
+        "    login(input(\"API token not detected. Enter your HuggingFace (WRITE) token: \"))"
+      ],
+      "metadata": {
+        "cellView": "form",
+        "id": "8Hl3fQmRLybp"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title ##Choose HF model to download\n",
+        "#@markdown Weights must be stored in safetensors\n",
+        "if modeldw != \"none\":\n",
+        "    !rm {model}-{BPW}bpw.zip\n",
+        "    !rm -r {model}-exl2-{BPW}bpw\n",
+        "User = \"meta-llama\" # @param {type:\"string\"}\n",
+        "Repo = \"Llama-2-7b-chat-hf\" # @param {type:\"string\"}\n",
+        "modeldw = f\"{User}/{Repo}\"\n",
+        "model = f\"{User}_{Repo}\"\n",
+        "!python download-model.py {modeldw}"
+      ],
+      "metadata": {
+        "cellView": "form",
+        "id": "NI1LUMD7H-Zx"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title Quantize the model\n",
+        "#@markdown ###Takes ~13 minutes to start quantizing first time, then quantization will last based on model size\n",
+        "#@markdown Target bits per weight:\n",
+        "BPW = \"4.125\" # @param {type:\"string\"}\n",
+        "!mkdir {model}-exl2-{BPW}bpw-WD\n",
+        "!mkdir {model}-exl2-{BPW}bpw\n",
+        "!cp models/{model}/config.json {model}-exl2-{BPW}bpw-WD\n",
+        "#@markdown Calibrate with dataset, may improve model output: (NOT WORKING YET)\n",
+        "Calibrate = False # @param {type:\"boolean\"}\n",
+        "#@markdown Calibration dataset, check above (must be parquet file):\n",
+        "dataset = \"wikitext\" # @param {type:\"string\"}\n",
+        "if Calibrate == True:\n",
+        "    quant = f\"convert.py -i models/{model} -o {model}-exl2-{BPW}bpw-WD -cf {model}-exl2-{BPW}bpw -c {dataset} -b {BPW}\"\n",
+        "else:\n",
+        "    quant = f\"convert.py -i models/{model} -o {model}-exl2-{BPW}bpw-WD -cf {model}-exl2-{BPW}bpw -b {BPW}\"\n",
+        "!python {quant}"
+      ],
+      "metadata": {
+        "id": "8anbEbGyNmBI",
+        "cellView": "form"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title Zip and download the model\n",
+        "!rm -r {model}-exl2-{BPW}bpw-WD\n",
+        "!rm -r models/{model}\n",
+        "print(\"Zipping. May take a few minutes\")\n",
+        "!zip -r {model}-{BPW}bpw.zip {model}-exl2-{BPW}bpw\n",
+        "from google.colab import files\n",
+        "files.download(f\"{model}-{BPW}bpw.zip\")\n",
+        "print(\"Colab download speeds very slow so download will take a while\")"
+      ],
+      "metadata": {
+        "cellView": "form",
+        "id": "XORLS2uPrbma"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}