{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# How to FineTune Llama 3 with SFTTrainer and Unsloth\n", "Hello everyone, today we are going to show how we can Fine Tune Llama 3 with SFTTrainer and Unsloth\n", "First we are going to perform a simmple Fine Tunning by using SFTTrainer\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1 - Installation of Pytorch\n", "The first step is install pythorch v 2.2.1 with Cuda 12.1 " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: pip in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (23.3)\n", "Collecting pip\n", " Downloading pip-24.0-py3-none-any.whl.metadata (3.6 kB)\n", "Downloading pip-24.0-py3-none-any.whl (2.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m38.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hInstalling collected packages: pip\n", " Attempting uninstall: pip\n", " Found existing installation: pip 23.3\n", " Uninstalling pip-23.3:\n", " Successfully uninstalled pip-23.3\n", "Successfully installed pip-24.0\n", "Looking in indexes: https://download.pytorch.org/whl/cu121\n", "Collecting torch==2.2.1\n", " Downloading https://download.pytorch.org/whl/cu121/torch-2.2.1%2Bcu121-cp310-cp310-linux_x86_64.whl (757.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m757.3/757.3 MB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hRequirement already satisfied: torchvision in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (0.15.2)\n", "Collecting torchaudio\n", " Downloading https://download.pytorch.org/whl/cu121/torchaudio-2.3.0%2Bcu121-cp310-cp310-linux_x86_64.whl (3.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m86.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n", "\u001b[?25hCollecting xformers\n", " Downloading https://download.pytorch.org/whl/cu121/xformers-0.0.26.post1-cp310-cp310-manylinux2014_x86_64.whl (222.7 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m222.7/222.7 MB\u001b[0m \u001b[31m33.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hRequirement already satisfied: filelock in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from torch==2.2.1) (3.9.0)\n", "Collecting typing-extensions>=4.8.0 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/typing_extensions-4.9.0-py3-none-any.whl (32 kB)\n", "Requirement already satisfied: sympy in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from torch==2.2.1) (1.12)\n", "Requirement already satisfied: networkx in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from torch==2.2.1) (2.8.4)\n", "Requirement already satisfied: jinja2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from torch==2.2.1) (3.1.3)\n", "Requirement already satisfied: fsspec in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from torch==2.2.1) (2022.11.0)\n", "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.7/23.7 MB\u001b[0m \u001b[31m97.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m823.6/823.6 kB\u001b[0m \u001b[31m50.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.1/14.1 MB\u001b[0m \u001b[31m120.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting nvidia-cudnn-cu12==8.9.2.26 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m731.7/731.7 MB\u001b[0m \u001b[31m12.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting nvidia-cublas-cu12==12.1.3.1 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.6/410.6 MB\u001b[0m \u001b[31m20.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting nvidia-cufft-cu12==11.0.2.54 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.6/121.6 MB\u001b[0m \u001b[31m50.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting nvidia-curand-cu12==10.3.2.106 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.5/56.5 MB\u001b[0m \u001b[31m76.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting nvidia-cusolver-cu12==11.4.5.107 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.2/124.2 MB\u001b[0m \u001b[31m52.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting nvidia-cusparse-cu12==12.1.0.106 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.0/196.0 MB\u001b[0m \u001b[31m36.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting nvidia-nccl-cu12==2.19.3 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl (166.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m166.0/166.0 MB\u001b[0m \u001b[31m40.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting nvidia-nvtx-cu12==12.1.105 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m99.1/99.1 kB\u001b[0m \u001b[31m14.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting triton==2.2.0 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/triton-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (167.9 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m167.9/167.9 MB\u001b[0m \u001b[31m19.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_nvjitlink_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (19.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.8/19.8 MB\u001b[0m \u001b[31m125.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hRequirement already satisfied: numpy in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from torchvision) (1.23.5)\n", "Requirement already satisfied: requests in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from torchvision) (2.31.0)\n", "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from torchvision) (10.3.0)\n", "INFO: pip is looking at multiple versions of torchaudio to determine which version is compatible with other requirements. This could take a while.\n", "Collecting torchaudio\n", " Downloading https://download.pytorch.org/whl/cu121/torchaudio-2.2.2%2Bcu121-cp310-cp310-linux_x86_64.whl (3.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m129.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Downloading https://download.pytorch.org/whl/cu121/torchaudio-2.2.1%2Bcu121-cp310-cp310-linux_x86_64.whl (3.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m87.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n", "\u001b[?25hINFO: pip is looking at multiple versions of xformers to determine which version is compatible with other requirements. This could take a while.\n", "Collecting xformers\n", " Downloading https://download.pytorch.org/whl/cu121/xformers-0.0.26-cp310-cp310-manylinux2014_x86_64.whl (222.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m222.6/222.6 MB\u001b[0m \u001b[31m11.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25h Downloading https://download.pytorch.org/whl/cu121/xformers-0.0.25.post1-cp310-cp310-manylinux2014_x86_64.whl (222.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m222.5/222.5 MB\u001b[0m \u001b[31m34.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25h Downloading https://download.pytorch.org/whl/cu121/xformers-0.0.25-cp310-cp310-manylinux2014_x86_64.whl (222.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m222.5/222.5 MB\u001b[0m \u001b[31m20.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hRequirement already satisfied: MarkupSafe>=2.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from jinja2->torch==2.2.1) (2.1.1)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests->torchvision) (2.0.4)\n", "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests->torchvision) (3.7)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests->torchvision) (1.26.18)\n", "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests->torchvision) (2024.2.2)\n", "Requirement already satisfied: mpmath>=0.19 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from sympy->torch==2.2.1) (1.3.0)\n", "Installing collected packages: typing-extensions, triton, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12, torch, xformers, torchaudio\n", " Attempting uninstall: typing-extensions\n", " Found existing installation: typing_extensions 4.4.0\n", " Uninstalling typing_extensions-4.4.0:\n", " Successfully uninstalled typing_extensions-4.4.0\n", " Attempting uninstall: torch\n", " Found existing installation: torch 2.0.1\n", " Uninstalling torch-2.0.1:\n", " Successfully uninstalled torch-2.0.1\n", "Successfully installed nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.19.3 nvidia-nvjitlink-cu12-12.1.105 nvidia-nvtx-cu12-12.1.105 torch-2.2.1+cu121 torchaudio-2.2.1+cu121 triton-2.2.0 typing-extensions-4.9.0 xformers-0.0.25\n" ] } ], "source": [ "!python -m pip install --upgrade pip\n", "!pip3 install torch==2.2.1 torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu121" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3 - Installation of Uslotch packages" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)\n", " Cloning https://github.com/unslothai/unsloth.git to /tmp/wsuser/pip-install-8a93kdi0/unsloth_56c62c14bb3f4be29d884342054fdd22\n", " Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/wsuser/pip-install-8a93kdi0/unsloth_56c62c14bb3f4be29d884342054fdd22\n", " Resolved https://github.com/unslothai/unsloth.git to commit 4211cc01409e3ced4f7abebaf68e244193b46e2c\n", " Installing build dependencies ... \u001b[?25ldone\n", "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", "\u001b[?25h Installing backend dependencies ... \u001b[?25ldone\n", "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", "\u001b[?25hRequirement already satisfied: tyro in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.8.3)\n", "Requirement already satisfied: transformers>=4.38.2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (4.40.2)\n", "Requirement already satisfied: datasets>=2.16.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.19.1)\n", "Requirement already satisfied: sentencepiece in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.1.97)\n", "Requirement already satisfied: tqdm in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (4.65.0)\n", "Requirement already satisfied: psutil in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (5.9.0)\n", "Requirement already satisfied: wheel>=0.42.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.43.0)\n", "Requirement already satisfied: numpy in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.23.5)\n", "Requirement already satisfied: protobuf<4.0.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.20.3)\n", "Requirement already satisfied: filelock in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.9.0)\n", "Requirement already satisfied: pyarrow>=12.0.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (16.0.0)\n", "Requirement already satisfied: pyarrow-hotfix in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.6)\n", "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.3.8)\n", "Requirement already satisfied: pandas in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.5.3)\n", "Requirement already satisfied: requests>=2.19.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.31.0)\n", "Requirement already satisfied: xxhash in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.4.1)\n", "Requirement already satisfied: multiprocess in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.70.16)\n", "Requirement already satisfied: fsspec<=2024.3.1,>=2023.1.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from fsspec[http]<=2024.3.1,>=2023.1.0->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2024.3.1)\n", "Requirement already satisfied: aiohttp in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.9.3)\n", "Requirement already satisfied: huggingface-hub>=0.21.2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.23.0)\n", "Requirement already satisfied: packaging in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (23.0)\n", "Requirement already satisfied: pyyaml>=5.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (6.0)\n", "Requirement already satisfied: regex!=2019.12.17 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from transformers>=4.38.2->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2022.3.15)\n", "Requirement already satisfied: tokenizers<0.20,>=0.19 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from transformers>=4.38.2->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.19.1)\n", "Requirement already satisfied: safetensors>=0.4.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from transformers>=4.38.2->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.4.3)\n", "Requirement already satisfied: docstring-parser>=0.14.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from tyro->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.16)\n", "Requirement already satisfied: typing-extensions>=4.7.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from tyro->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (4.9.0)\n", "Requirement already satisfied: rich>=11.1.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from tyro->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (13.7.1)\n", "Requirement already satisfied: shtab>=1.5.6 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from tyro->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.7.1)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.2.0)\n", "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (23.1.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.3.3)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (6.0.2)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.8.1)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (4.0.2)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests>=2.19.0->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.0.4)\n", "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests>=2.19.0->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.7)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests>=2.19.0->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.26.18)\n", "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests>=2.19.0->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2024.2.2)\n", "Requirement already satisfied: markdown-it-py>=2.2.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from rich>=11.1.0->tyro->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.0.0)\n", "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from rich>=11.1.0->tyro->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.15.1)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from pandas->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from pandas->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2022.7)\n", "Requirement already satisfied: mdurl~=0.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich>=11.1.0->tyro->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.1.2)\n", "Requirement already satisfied: six>=1.5 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from python-dateutil>=2.8.1->pandas->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.16.0)\n", "Requirement already satisfied: trl in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (0.8.6)\n", "Requirement already satisfied: peft in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (0.10.0)\n", "Requirement already satisfied: accelerate in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (0.30.0)\n", "Requirement already satisfied: bitsandbytes in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (0.43.1)\n", "Requirement already satisfied: datasets in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (2.19.1)\n", "Requirement already satisfied: filelock in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (3.9.0)\n", "Requirement already satisfied: numpy>=1.17 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (1.23.5)\n", "Requirement already satisfied: pyarrow>=12.0.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (16.0.0)\n", "Requirement already satisfied: pyarrow-hotfix in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (0.6)\n", "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (0.3.8)\n", "Requirement already satisfied: pandas in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (1.5.3)\n", "Requirement already satisfied: requests>=2.19.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (2.31.0)\n", "Requirement already satisfied: tqdm>=4.62.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (4.65.0)\n", "Requirement already satisfied: xxhash in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (3.4.1)\n", "Requirement already satisfied: multiprocess in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (0.70.16)\n", "Requirement already satisfied: fsspec<=2024.3.1,>=2023.1.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from fsspec[http]<=2024.3.1,>=2023.1.0->datasets) (2024.3.1)\n", "Requirement already satisfied: aiohttp in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (3.9.3)\n", "Requirement already satisfied: huggingface-hub>=0.21.2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (0.23.0)\n", "Requirement already satisfied: packaging in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (23.0)\n", "Requirement already satisfied: pyyaml>=5.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (6.0)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets) (1.2.0)\n", "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets) (23.1.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.3)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets) (6.0.2)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets) (1.8.1)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets) (4.0.2)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from huggingface-hub>=0.21.2->datasets) (4.9.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (2.0.4)\n", "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (3.7)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (1.26.18)\n", "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (2024.2.2)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from pandas->datasets) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from pandas->datasets) (2022.7)\n", "Requirement already satisfied: six>=1.5 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n", "Requirement already satisfied: hyperopt in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (0.2.5)\n", "Requirement already satisfied: numpy in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from hyperopt) (1.23.5)\n", "Requirement already satisfied: scipy in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from hyperopt) (1.10.1)\n", "Requirement already satisfied: six in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from hyperopt) (1.16.0)\n", "Requirement already satisfied: networkx>=2.2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from hyperopt) (2.8.4)\n", "Requirement already satisfied: future in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from hyperopt) (0.18.3)\n", "Requirement already satisfied: tqdm in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from hyperopt) (4.65.0)\n", "Requirement already satisfied: cloudpickle in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from hyperopt) (2.2.1)\n", "Requirement already satisfied: optuna in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (3.6.1)\n", "Requirement already satisfied: alembic>=1.5.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from optuna) (1.13.1)\n", "Requirement already satisfied: colorlog in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from optuna) (6.8.2)\n", "Requirement already satisfied: numpy in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from optuna) (1.23.5)\n", "Requirement already satisfied: packaging>=20.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from optuna) (23.0)\n", "Requirement already satisfied: sqlalchemy>=1.3.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from optuna) (1.4.39)\n", "Requirement already satisfied: tqdm in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from optuna) (4.65.0)\n", "Requirement already satisfied: PyYAML in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from optuna) (6.0)\n", "Requirement already satisfied: Mako in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from alembic>=1.5.0->optuna) (1.3.3)\n", "Requirement already satisfied: typing-extensions>=4 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from alembic>=1.5.0->optuna) (4.9.0)\n", "Requirement already satisfied: greenlet!=0.4.17 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from sqlalchemy>=1.3.0->optuna) (2.0.1)\n", "Requirement already satisfied: MarkupSafe>=0.9.2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from Mako->alembic>=1.5.0->optuna) (2.1.1)\n" ] } ], "source": [ "import torch\n", "major_version, minor_version = torch.cuda.get_device_capability()\n", "# Must install separately since Colab has torch 2.2.1, which breaks packages\n", "!pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n", "if major_version >= 8:\n", " # Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)\n", " !pip install --no-deps packaging ninja einops flash-attn xformers trl peft \\\n", " accelerate bitsandbytes\n", "else:\n", " # Use this for older GPUs (V100, Tesla T4, RTX 20xx)\n", " !pip install --no-deps trl peft accelerate bitsandbytes\n", "!pip install datasets\n", "!pip install hyperopt\n", "!pip install optuna \n", "pass" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4 - Analysis of our infrastructure\n", "In ordering to perform any training it is important to know our system in order to take the full advantage of the system." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "dH4JvbO9oiHE", "outputId": "399bc210-c095-4807-900f-6b4cf2fe133f" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Unable to find python bindings at /usr/local/dcgm/bindings/python3. No data will be captured.\n", "xFormers 0.0.25\n", "memory_efficient_attention.ckF: unavailable\n", "memory_efficient_attention.ckB: unavailable\n", "memory_efficient_attention.ck_decoderF: unavailable\n", "memory_efficient_attention.ck_splitKF: unavailable\n", "memory_efficient_attention.cutlassF: available\n", "memory_efficient_attention.cutlassB: available\n", "memory_efficient_attention.decoderF: available\n", "memory_efficient_attention.flshattF@v2.5.6: available\n", "memory_efficient_attention.flshattB@v2.5.6: available\n", "memory_efficient_attention.smallkF: available\n", "memory_efficient_attention.smallkB: available\n", "memory_efficient_attention.triton_splitKF: unavailable\n", "indexing.scaled_index_addF: unavailable\n", "indexing.scaled_index_addB: unavailable\n", "indexing.index_select: unavailable\n", "sequence_parallel_fused.write_values: available\n", "sequence_parallel_fused.wait_values: available\n", "sequence_parallel_fused.cuda_memset_32b_async: available\n", "sp24.sparse24_sparsify_both_ways: available\n", "sp24.sparse24_apply: available\n", "sp24.sparse24_apply_dense_output: available\n", "sp24._sparse24_gemm: available\n", "sp24._cslt_sparse_mm@0.4.0: available\n", "swiglu.dual_gemm_silu: available\n", "swiglu.gemm_fused_operand_sum: available\n", "swiglu.fused.p.cpp: available\n", "is_triton_available: False\n", "pytorch.version: 2.2.1+cu121\n", "pytorch.cuda: available\n", "gpu.compute_capability: 7.0\n", "gpu.name: Tesla V100-PCIE-16GB\n", "dcgm_profiler: unavailable\n", "build.info: available\n", "build.cuda_version: 1201\n", "build.hip_version: None\n", "build.python_version: 3.10.13\n", "build.torch_version: 2.2.1+cu121\n", "build.env.TORCH_CUDA_ARCH_LIST: 5.0+PTX 6.0 6.1 7.0 7.5 8.0+PTX 9.0\n", "build.env.PYTORCH_ROCM_ARCH: None\n", "build.env.XFORMERS_BUILD_TYPE: Release\n", "build.env.XFORMERS_ENABLE_DEBUG_ASSERTIONS: None\n", "build.env.NVCC_FLAGS: None\n", "build.env.XFORMERS_PACKAGE_FROM: wheel-v0.0.25\n", "build.nvcc_version: 12.1.66\n", "source.privacy: open source\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "++++++++++++++++++ BUG REPORT INFORMATION ++++++++++++++++++\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "++++++++++++++++++++++++++ OTHER +++++++++++++++++++++++++++\n", "CUDA specs: CUDASpecs(highest_compute_capability=(7, 0), cuda_version_string='121', cuda_version_tuple=(12, 1))\n", "PyTorch settings found: CUDA_VERSION=121, Highest Compute Capability: (7, 0).\n", "To manually override the PyTorch CUDA version please see: https://github.com/TimDettmers/bitsandbytes/blob/main/docs/source/nonpytorchcuda.mdx\n", "WARNING: Compute capability < 7.5 detected! Only slow 8-bit matmul is supported for your GPU!\n", "If you run into issues with 8-bit matmul, you can try 4-bit quantization:\n", "https://huggingface.co/blog/4bit-transformers-bitsandbytes\n", "The directory listed in your path is found to be non-existent: /usr/local/nvidia/lib\n", "The directory listed in your path is found to be non-existent: //private.runtime.dataplatform.cloud.ibm.com\n", "The directory listed in your path is found to be non-existent: /home/wsuser/jars/*\n", "The directory listed in your path is found to be non-existent: /opt/jdbc/*\n", "The directory listed in your path is found to be non-existent: bluemix/prod\n", "The directory listed in your path is found to be non-existent: //api.dataplatform.cloud.ibm.com\n", "The directory listed in your path is found to be non-existent: //matplotlib_inline.backend_inline\n", "The directory listed in your path is found to be non-existent: --xla_gpu_cuda_data_dir=/opt/conda/envs/Python-RT23.1-CUDA\n", "CUDA SETUP: WARNING! CUDA runtime files not found in any environmental path.\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "++++++++++++++++++++++ DEBUG INFO END ++++++++++++++++++++++\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "Checking that the library is importable and CUDA is callable...\n", "SUCCESS!\n", "Installation was successful!\n", "Thu May 9 19:59:13 2024 \n", "+---------------------------------------------------------------------------------------+\n", "| NVIDIA-SMI 535.129.03 Driver Version: 535.129.03 CUDA Version: 12.2 |\n", "|-----------------------------------------+----------------------+----------------------+\n", "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", "| | | MIG M. |\n", "|=========================================+======================+======================|\n", "| 0 Tesla V100-PCIE-16GB Off | 00000000:AF:00.0 Off | 0 |\n", "| N/A 33C P0 40W / 250W | 4MiB / 16384MiB | 0% Default |\n", "| | | N/A |\n", "+-----------------------------------------+----------------------+----------------------+\n", "| 1 Tesla V100-PCIE-16GB Off | 00000000:D8:00.0 Off | 0 |\n", "| N/A 33C P0 26W / 250W | 4MiB / 16384MiB | 0% Default |\n", "| | | N/A |\n", "+-----------------------------------------+----------------------+----------------------+\n", " \n", "+---------------------------------------------------------------------------------------+\n", "| Processes: |\n", "| GPU GI CI PID Type Process name GPU Memory |\n", "| ID ID Usage |\n", "|=======================================================================================|\n", "| No running processes found |\n", "+---------------------------------------------------------------------------------------+\n" ] } ], "source": [ "!python -m xformers.info\n", "!python -m bitsandbytes\n", "!nvidia-smi" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5 Login to Hugging Face" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n", "Token is valid (permission: write).\n", "Your token has been saved to /home/wsuser/.cache/huggingface/token\n", "Login successful\n" ] } ], "source": [ "token=\"hf_\"\n", "from huggingface_hub import login, logout\n", "login(token) # non-blocking login" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5 Simple Fine Tunning Method\n", "\n", "First let us show the simplest method that is given by SFTTrainer" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "25db645610ac40e6a8a647896dec0f16", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Resolving data files: 0%| | 0/18 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a8d6ac36539a4c12a5dc6eafbc58d9ca", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Resolving data files: 0%| | 0/18 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "53fa0f7afe9849c99fa1761aafd363c1", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading dataset shards: 0%| | 0/18 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "da22a855c9524144870d16abcf850047", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer_config.json: 0%| | 0.00/51.0k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6770b2824fa64dc1a3dd6279683e6915", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer.json: 0%| | 0.00/9.09M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "cd128b3e68ac4bf6a87d812e1c0c248e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "special_tokens_map.json: 0%| | 0.00/73.0 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0fb4fe0fc5384b3182762db3f8c42b9e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "config.json: 0%| | 0.00/654 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7f9cfeabcf424ccc98a2b9d69d770a0a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model.safetensors.index.json: 0%| | 0.00/23.9k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8244c9b77e7f4afe8524963a8f7b02e2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading shards: 0%| | 0/4 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "690662c1a9244cdd9ea9f1162ce3cd30", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model-00001-of-00004.safetensors: 0%| | 0.00/4.98G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3813d56b85b74450a93e9ade68a807b5", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model-00002-of-00004.safetensors: 0%| | 0.00/5.00G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ebb06c38dd79488ab8eb273359406577", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model-00003-of-00004.safetensors: 0%| | 0.00/4.92G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "5251b05267db420d8f14ce05b7d52b81", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model-00004-of-00004.safetensors: 0%| | 0.00/1.17G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "947b7ad7d14f4201902e1a292ec7f513", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/4 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1ddcf0ae14934fd79d8926aee0972abe", "version_major": 2, "version_minor": 0 }, "text/plain": [ "generation_config.json: 0%| | 0.00/187 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7423975680ef41ff9ac362c72b2c0907", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/100 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", "max_steps is given, it will override any value given in num_train_epochs\n", "/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/utils/checkpoint.py:460: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n", " warnings.warn(\n" ] }, { "data": { "text/html": [ "\n", "
Step | \n", "Training Loss | \n", "
---|---|
1 | \n", "2.346700 | \n", "
"
],
"text/plain": [
" "
],
"text/plain": [
"\n",
" \n",
"
\n",
" \n",
" \n",
" \n",
" \n",
"Step \n",
" Training Loss \n",
"