{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# How to FineTune Llama 3 with SFTTrainer and Unsloth\n", "Hello everyone, today we are going to show how we can Fine Tune Llama 3 with SFTTrainer and Unsloth\n", "First we are going to perform a simmple Fine Tunning by using SFTTrainer\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1 - Installation of Pytorch\n", "The first step is install pythorch v 2.2.1 with Cuda 12.1 " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: pip in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (23.3)\n", "Collecting pip\n", " Downloading pip-24.0-py3-none-any.whl.metadata (3.6 kB)\n", "Downloading pip-24.0-py3-none-any.whl (2.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m38.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hInstalling collected packages: pip\n", " Attempting uninstall: pip\n", " Found existing installation: pip 23.3\n", " Uninstalling pip-23.3:\n", " Successfully uninstalled pip-23.3\n", "Successfully installed pip-24.0\n", "Looking in indexes: https://download.pytorch.org/whl/cu121\n", "Collecting torch==2.2.1\n", " Downloading https://download.pytorch.org/whl/cu121/torch-2.2.1%2Bcu121-cp310-cp310-linux_x86_64.whl (757.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m757.3/757.3 MB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hRequirement already satisfied: torchvision in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (0.15.2)\n", "Collecting torchaudio\n", " Downloading https://download.pytorch.org/whl/cu121/torchaudio-2.3.0%2Bcu121-cp310-cp310-linux_x86_64.whl (3.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m86.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n", "\u001b[?25hCollecting xformers\n", " Downloading https://download.pytorch.org/whl/cu121/xformers-0.0.26.post1-cp310-cp310-manylinux2014_x86_64.whl (222.7 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m222.7/222.7 MB\u001b[0m \u001b[31m33.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hRequirement already satisfied: filelock in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from torch==2.2.1) (3.9.0)\n", "Collecting typing-extensions>=4.8.0 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/typing_extensions-4.9.0-py3-none-any.whl (32 kB)\n", "Requirement already satisfied: sympy in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from torch==2.2.1) (1.12)\n", "Requirement already satisfied: networkx in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from torch==2.2.1) (2.8.4)\n", "Requirement already satisfied: jinja2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from torch==2.2.1) (3.1.3)\n", "Requirement already satisfied: fsspec in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from torch==2.2.1) (2022.11.0)\n", "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.7/23.7 MB\u001b[0m \u001b[31m97.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m823.6/823.6 kB\u001b[0m \u001b[31m50.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.1/14.1 MB\u001b[0m \u001b[31m120.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting nvidia-cudnn-cu12==8.9.2.26 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m731.7/731.7 MB\u001b[0m \u001b[31m12.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting nvidia-cublas-cu12==12.1.3.1 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.6/410.6 MB\u001b[0m \u001b[31m20.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting nvidia-cufft-cu12==11.0.2.54 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.6/121.6 MB\u001b[0m \u001b[31m50.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting nvidia-curand-cu12==10.3.2.106 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.5/56.5 MB\u001b[0m \u001b[31m76.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting nvidia-cusolver-cu12==11.4.5.107 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.2/124.2 MB\u001b[0m \u001b[31m52.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting nvidia-cusparse-cu12==12.1.0.106 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.0/196.0 MB\u001b[0m \u001b[31m36.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting nvidia-nccl-cu12==2.19.3 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl (166.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m166.0/166.0 MB\u001b[0m \u001b[31m40.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting nvidia-nvtx-cu12==12.1.105 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m99.1/99.1 kB\u001b[0m \u001b[31m14.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting triton==2.2.0 (from torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/triton-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (167.9 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m167.9/167.9 MB\u001b[0m \u001b[31m19.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch==2.2.1)\n", " Downloading https://download.pytorch.org/whl/cu121/nvidia_nvjitlink_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (19.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.8/19.8 MB\u001b[0m \u001b[31m125.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hRequirement already satisfied: numpy in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from torchvision) (1.23.5)\n", "Requirement already satisfied: requests in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from torchvision) (2.31.0)\n", "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from torchvision) (10.3.0)\n", "INFO: pip is looking at multiple versions of torchaudio to determine which version is compatible with other requirements. This could take a while.\n", "Collecting torchaudio\n", " Downloading https://download.pytorch.org/whl/cu121/torchaudio-2.2.2%2Bcu121-cp310-cp310-linux_x86_64.whl (3.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m129.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Downloading https://download.pytorch.org/whl/cu121/torchaudio-2.2.1%2Bcu121-cp310-cp310-linux_x86_64.whl (3.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m87.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n", "\u001b[?25hINFO: pip is looking at multiple versions of xformers to determine which version is compatible with other requirements. This could take a while.\n", "Collecting xformers\n", " Downloading https://download.pytorch.org/whl/cu121/xformers-0.0.26-cp310-cp310-manylinux2014_x86_64.whl (222.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m222.6/222.6 MB\u001b[0m \u001b[31m11.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25h Downloading https://download.pytorch.org/whl/cu121/xformers-0.0.25.post1-cp310-cp310-manylinux2014_x86_64.whl (222.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m222.5/222.5 MB\u001b[0m \u001b[31m34.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25h Downloading https://download.pytorch.org/whl/cu121/xformers-0.0.25-cp310-cp310-manylinux2014_x86_64.whl (222.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m222.5/222.5 MB\u001b[0m \u001b[31m20.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hRequirement already satisfied: MarkupSafe>=2.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from jinja2->torch==2.2.1) (2.1.1)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests->torchvision) (2.0.4)\n", "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests->torchvision) (3.7)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests->torchvision) (1.26.18)\n", "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests->torchvision) (2024.2.2)\n", "Requirement already satisfied: mpmath>=0.19 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from sympy->torch==2.2.1) (1.3.0)\n", "Installing collected packages: typing-extensions, triton, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12, torch, xformers, torchaudio\n", " Attempting uninstall: typing-extensions\n", " Found existing installation: typing_extensions 4.4.0\n", " Uninstalling typing_extensions-4.4.0:\n", " Successfully uninstalled typing_extensions-4.4.0\n", " Attempting uninstall: torch\n", " Found existing installation: torch 2.0.1\n", " Uninstalling torch-2.0.1:\n", " Successfully uninstalled torch-2.0.1\n", "Successfully installed nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.19.3 nvidia-nvjitlink-cu12-12.1.105 nvidia-nvtx-cu12-12.1.105 torch-2.2.1+cu121 torchaudio-2.2.1+cu121 triton-2.2.0 typing-extensions-4.9.0 xformers-0.0.25\n" ] } ], "source": [ "!python -m pip install --upgrade pip\n", "!pip3 install torch==2.2.1 torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu121" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3 - Installation of Uslotch packages" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)\n", " Cloning https://github.com/unslothai/unsloth.git to /tmp/wsuser/pip-install-8a93kdi0/unsloth_56c62c14bb3f4be29d884342054fdd22\n", " Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/wsuser/pip-install-8a93kdi0/unsloth_56c62c14bb3f4be29d884342054fdd22\n", " Resolved https://github.com/unslothai/unsloth.git to commit 4211cc01409e3ced4f7abebaf68e244193b46e2c\n", " Installing build dependencies ... \u001b[?25ldone\n", "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", "\u001b[?25h Installing backend dependencies ... \u001b[?25ldone\n", "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", "\u001b[?25hRequirement already satisfied: tyro in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.8.3)\n", "Requirement already satisfied: transformers>=4.38.2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (4.40.2)\n", "Requirement already satisfied: datasets>=2.16.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.19.1)\n", "Requirement already satisfied: sentencepiece in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.1.97)\n", "Requirement already satisfied: tqdm in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (4.65.0)\n", "Requirement already satisfied: psutil in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (5.9.0)\n", "Requirement already satisfied: wheel>=0.42.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.43.0)\n", "Requirement already satisfied: numpy in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.23.5)\n", "Requirement already satisfied: protobuf<4.0.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.20.3)\n", "Requirement already satisfied: filelock in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.9.0)\n", "Requirement already satisfied: pyarrow>=12.0.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (16.0.0)\n", "Requirement already satisfied: pyarrow-hotfix in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.6)\n", "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.3.8)\n", "Requirement already satisfied: pandas in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.5.3)\n", "Requirement already satisfied: requests>=2.19.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.31.0)\n", "Requirement already satisfied: xxhash in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.4.1)\n", "Requirement already satisfied: multiprocess in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.70.16)\n", "Requirement already satisfied: fsspec<=2024.3.1,>=2023.1.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from fsspec[http]<=2024.3.1,>=2023.1.0->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2024.3.1)\n", "Requirement already satisfied: aiohttp in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.9.3)\n", "Requirement already satisfied: huggingface-hub>=0.21.2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.23.0)\n", "Requirement already satisfied: packaging in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (23.0)\n", "Requirement already satisfied: pyyaml>=5.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (6.0)\n", "Requirement already satisfied: regex!=2019.12.17 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from transformers>=4.38.2->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2022.3.15)\n", "Requirement already satisfied: tokenizers<0.20,>=0.19 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from transformers>=4.38.2->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.19.1)\n", "Requirement already satisfied: safetensors>=0.4.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from transformers>=4.38.2->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.4.3)\n", "Requirement already satisfied: docstring-parser>=0.14.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from tyro->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.16)\n", "Requirement already satisfied: typing-extensions>=4.7.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from tyro->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (4.9.0)\n", "Requirement already satisfied: rich>=11.1.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from tyro->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (13.7.1)\n", "Requirement already satisfied: shtab>=1.5.6 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from tyro->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.7.1)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.2.0)\n", "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (23.1.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.3.3)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (6.0.2)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.8.1)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (4.0.2)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests>=2.19.0->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.0.4)\n", "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests>=2.19.0->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.7)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests>=2.19.0->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.26.18)\n", "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests>=2.19.0->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2024.2.2)\n", "Requirement already satisfied: markdown-it-py>=2.2.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from rich>=11.1.0->tyro->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.0.0)\n", "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from rich>=11.1.0->tyro->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.15.1)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from pandas->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from pandas->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2022.7)\n", "Requirement already satisfied: mdurl~=0.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich>=11.1.0->tyro->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.1.2)\n", "Requirement already satisfied: six>=1.5 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from python-dateutil>=2.8.1->pandas->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.16.0)\n", "Requirement already satisfied: trl in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (0.8.6)\n", "Requirement already satisfied: peft in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (0.10.0)\n", "Requirement already satisfied: accelerate in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (0.30.0)\n", "Requirement already satisfied: bitsandbytes in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (0.43.1)\n", "Requirement already satisfied: datasets in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (2.19.1)\n", "Requirement already satisfied: filelock in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (3.9.0)\n", "Requirement already satisfied: numpy>=1.17 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (1.23.5)\n", "Requirement already satisfied: pyarrow>=12.0.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (16.0.0)\n", "Requirement already satisfied: pyarrow-hotfix in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (0.6)\n", "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (0.3.8)\n", "Requirement already satisfied: pandas in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (1.5.3)\n", "Requirement already satisfied: requests>=2.19.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (2.31.0)\n", "Requirement already satisfied: tqdm>=4.62.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (4.65.0)\n", "Requirement already satisfied: xxhash in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (3.4.1)\n", "Requirement already satisfied: multiprocess in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (0.70.16)\n", "Requirement already satisfied: fsspec<=2024.3.1,>=2023.1.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from fsspec[http]<=2024.3.1,>=2023.1.0->datasets) (2024.3.1)\n", "Requirement already satisfied: aiohttp in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (3.9.3)\n", "Requirement already satisfied: huggingface-hub>=0.21.2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (0.23.0)\n", "Requirement already satisfied: packaging in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (23.0)\n", "Requirement already satisfied: pyyaml>=5.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from datasets) (6.0)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets) (1.2.0)\n", "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets) (23.1.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.3)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets) (6.0.2)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets) (1.8.1)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from aiohttp->datasets) (4.0.2)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from huggingface-hub>=0.21.2->datasets) (4.9.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (2.0.4)\n", "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (3.7)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (1.26.18)\n", "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (2024.2.2)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from pandas->datasets) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from pandas->datasets) (2022.7)\n", "Requirement already satisfied: six>=1.5 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n", "Requirement already satisfied: hyperopt in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (0.2.5)\n", "Requirement already satisfied: numpy in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from hyperopt) (1.23.5)\n", "Requirement already satisfied: scipy in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from hyperopt) (1.10.1)\n", "Requirement already satisfied: six in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from hyperopt) (1.16.0)\n", "Requirement already satisfied: networkx>=2.2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from hyperopt) (2.8.4)\n", "Requirement already satisfied: future in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from hyperopt) (0.18.3)\n", "Requirement already satisfied: tqdm in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from hyperopt) (4.65.0)\n", "Requirement already satisfied: cloudpickle in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from hyperopt) (2.2.1)\n", "Requirement already satisfied: optuna in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (3.6.1)\n", "Requirement already satisfied: alembic>=1.5.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from optuna) (1.13.1)\n", "Requirement already satisfied: colorlog in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from optuna) (6.8.2)\n", "Requirement already satisfied: numpy in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from optuna) (1.23.5)\n", "Requirement already satisfied: packaging>=20.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from optuna) (23.0)\n", "Requirement already satisfied: sqlalchemy>=1.3.0 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from optuna) (1.4.39)\n", "Requirement already satisfied: tqdm in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from optuna) (4.65.0)\n", "Requirement already satisfied: PyYAML in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from optuna) (6.0)\n", "Requirement already satisfied: Mako in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from alembic>=1.5.0->optuna) (1.3.3)\n", "Requirement already satisfied: typing-extensions>=4 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from alembic>=1.5.0->optuna) (4.9.0)\n", "Requirement already satisfied: greenlet!=0.4.17 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from sqlalchemy>=1.3.0->optuna) (2.0.1)\n", "Requirement already satisfied: MarkupSafe>=0.9.2 in /opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages (from Mako->alembic>=1.5.0->optuna) (2.1.1)\n" ] } ], "source": [ "import torch\n", "major_version, minor_version = torch.cuda.get_device_capability()\n", "# Must install separately since Colab has torch 2.2.1, which breaks packages\n", "!pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n", "if major_version >= 8:\n", " # Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)\n", " !pip install --no-deps packaging ninja einops flash-attn xformers trl peft \\\n", " accelerate bitsandbytes\n", "else:\n", " # Use this for older GPUs (V100, Tesla T4, RTX 20xx)\n", " !pip install --no-deps trl peft accelerate bitsandbytes\n", "!pip install datasets\n", "!pip install hyperopt\n", "!pip install optuna \n", "pass" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4 - Analysis of our infrastructure\n", "In ordering to perform any training it is important to know our system in order to take the full advantage of the system." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "dH4JvbO9oiHE", "outputId": "399bc210-c095-4807-900f-6b4cf2fe133f" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Unable to find python bindings at /usr/local/dcgm/bindings/python3. No data will be captured.\n", "xFormers 0.0.25\n", "memory_efficient_attention.ckF: unavailable\n", "memory_efficient_attention.ckB: unavailable\n", "memory_efficient_attention.ck_decoderF: unavailable\n", "memory_efficient_attention.ck_splitKF: unavailable\n", "memory_efficient_attention.cutlassF: available\n", "memory_efficient_attention.cutlassB: available\n", "memory_efficient_attention.decoderF: available\n", "memory_efficient_attention.flshattF@v2.5.6: available\n", "memory_efficient_attention.flshattB@v2.5.6: available\n", "memory_efficient_attention.smallkF: available\n", "memory_efficient_attention.smallkB: available\n", "memory_efficient_attention.triton_splitKF: unavailable\n", "indexing.scaled_index_addF: unavailable\n", "indexing.scaled_index_addB: unavailable\n", "indexing.index_select: unavailable\n", "sequence_parallel_fused.write_values: available\n", "sequence_parallel_fused.wait_values: available\n", "sequence_parallel_fused.cuda_memset_32b_async: available\n", "sp24.sparse24_sparsify_both_ways: available\n", "sp24.sparse24_apply: available\n", "sp24.sparse24_apply_dense_output: available\n", "sp24._sparse24_gemm: available\n", "sp24._cslt_sparse_mm@0.4.0: available\n", "swiglu.dual_gemm_silu: available\n", "swiglu.gemm_fused_operand_sum: available\n", "swiglu.fused.p.cpp: available\n", "is_triton_available: False\n", "pytorch.version: 2.2.1+cu121\n", "pytorch.cuda: available\n", "gpu.compute_capability: 7.0\n", "gpu.name: Tesla V100-PCIE-16GB\n", "dcgm_profiler: unavailable\n", "build.info: available\n", "build.cuda_version: 1201\n", "build.hip_version: None\n", "build.python_version: 3.10.13\n", "build.torch_version: 2.2.1+cu121\n", "build.env.TORCH_CUDA_ARCH_LIST: 5.0+PTX 6.0 6.1 7.0 7.5 8.0+PTX 9.0\n", "build.env.PYTORCH_ROCM_ARCH: None\n", "build.env.XFORMERS_BUILD_TYPE: Release\n", "build.env.XFORMERS_ENABLE_DEBUG_ASSERTIONS: None\n", "build.env.NVCC_FLAGS: None\n", "build.env.XFORMERS_PACKAGE_FROM: wheel-v0.0.25\n", "build.nvcc_version: 12.1.66\n", "source.privacy: open source\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "++++++++++++++++++ BUG REPORT INFORMATION ++++++++++++++++++\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "++++++++++++++++++++++++++ OTHER +++++++++++++++++++++++++++\n", "CUDA specs: CUDASpecs(highest_compute_capability=(7, 0), cuda_version_string='121', cuda_version_tuple=(12, 1))\n", "PyTorch settings found: CUDA_VERSION=121, Highest Compute Capability: (7, 0).\n", "To manually override the PyTorch CUDA version please see: https://github.com/TimDettmers/bitsandbytes/blob/main/docs/source/nonpytorchcuda.mdx\n", "WARNING: Compute capability < 7.5 detected! Only slow 8-bit matmul is supported for your GPU!\n", "If you run into issues with 8-bit matmul, you can try 4-bit quantization:\n", "https://huggingface.co/blog/4bit-transformers-bitsandbytes\n", "The directory listed in your path is found to be non-existent: /usr/local/nvidia/lib\n", "The directory listed in your path is found to be non-existent: //private.runtime.dataplatform.cloud.ibm.com\n", "The directory listed in your path is found to be non-existent: /home/wsuser/jars/*\n", "The directory listed in your path is found to be non-existent: /opt/jdbc/*\n", "The directory listed in your path is found to be non-existent: bluemix/prod\n", "The directory listed in your path is found to be non-existent: //api.dataplatform.cloud.ibm.com\n", "The directory listed in your path is found to be non-existent: //matplotlib_inline.backend_inline\n", "The directory listed in your path is found to be non-existent: --xla_gpu_cuda_data_dir=/opt/conda/envs/Python-RT23.1-CUDA\n", "CUDA SETUP: WARNING! CUDA runtime files not found in any environmental path.\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "++++++++++++++++++++++ DEBUG INFO END ++++++++++++++++++++++\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "Checking that the library is importable and CUDA is callable...\n", "SUCCESS!\n", "Installation was successful!\n", "Thu May 9 19:59:13 2024 \n", "+---------------------------------------------------------------------------------------+\n", "| NVIDIA-SMI 535.129.03 Driver Version: 535.129.03 CUDA Version: 12.2 |\n", "|-----------------------------------------+----------------------+----------------------+\n", "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", "| | | MIG M. |\n", "|=========================================+======================+======================|\n", "| 0 Tesla V100-PCIE-16GB Off | 00000000:AF:00.0 Off | 0 |\n", "| N/A 33C P0 40W / 250W | 4MiB / 16384MiB | 0% Default |\n", "| | | N/A |\n", "+-----------------------------------------+----------------------+----------------------+\n", "| 1 Tesla V100-PCIE-16GB Off | 00000000:D8:00.0 Off | 0 |\n", "| N/A 33C P0 26W / 250W | 4MiB / 16384MiB | 0% Default |\n", "| | | N/A |\n", "+-----------------------------------------+----------------------+----------------------+\n", " \n", "+---------------------------------------------------------------------------------------+\n", "| Processes: |\n", "| GPU GI CI PID Type Process name GPU Memory |\n", "| ID ID Usage |\n", "|=======================================================================================|\n", "| No running processes found |\n", "+---------------------------------------------------------------------------------------+\n" ] } ], "source": [ "!python -m xformers.info\n", "!python -m bitsandbytes\n", "!nvidia-smi" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5 Login to Hugging Face" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n", "Token is valid (permission: write).\n", "Your token has been saved to /home/wsuser/.cache/huggingface/token\n", "Login successful\n" ] } ], "source": [ "token=\"hf_\"\n", "from huggingface_hub import login, logout\n", "login(token) # non-blocking login" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5 Simple Fine Tunning Method\n", "\n", "First let us show the simplest method that is given by SFTTrainer" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "25db645610ac40e6a8a647896dec0f16", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Resolving data files: 0%| | 0/18 [00:00\n", " \n", " \n", " [1/1 00:04, Epoch 0/1]\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
12.346700

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "TrainOutput(global_step=1, training_loss=2.346719980239868, metrics={'train_runtime': 8.2949, 'train_samples_per_second': 0.482, 'train_steps_per_second': 0.121, 'total_flos': 74593973698560.0, 'train_loss': 2.346719980239868, 'epoch': 0.04})" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from datasets import load_dataset\n", "import torch\n", "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer\n", "from peft import LoraConfig\n", "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "# Load the dataset\n", "dataset_name = \"ruslanmv/ai-medical-dataset\"\n", "dataset = load_dataset(dataset_name, split=\"train\")\n", "# Select the first 1000 rows of the dataset\n", "dataset = dataset.select(range(100))\n", "# Device map\n", "device_map = 'auto' # for PP and running with `python test_sft.py`\n", "# Load the model + tokenizer\n", "model_name = \"meta-llama/Meta-Llama-3-8B-Instruct\"\n", "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n", "tokenizer.pad_token = tokenizer.eos_token\n", "bnb_config = BitsAndBytesConfig(\n", " load_in_4bit=True,\n", " bnb_4bit_quant_type=\"nf4\",\n", " bnb_4bit_compute_dtype=torch.float16,\n", ")\n", "model = AutoModelForCausalLM.from_pretrained(\n", " model_name,\n", " quantization_config=bnb_config,\n", " trust_remote_code=True,\n", " use_cache=False,\n", " device_map=device_map\n", ")\n", "# PEFT config\n", "lora_alpha = 16\n", "lora_dropout = 0.1\n", "lora_r = 32 # 64\n", "peft_config = LoraConfig(\n", " lora_alpha=lora_alpha,\n", " lora_dropout=lora_dropout,\n", " r=lora_r,\n", " bias=\"none\",\n", " task_type=\"CAUSAL_LM\",\n", " target_modules=[\"k_proj\", \"q_proj\", \"v_proj\", \"up_proj\", \"down_proj\", \"gate_proj\"],\n", " modules_to_save=[\"embed_tokens\", \"input_layernorm\", \"post_attention_layernorm\", \"norm\"],\n", ")\n", "# Args\n", "max_seq_length = 512\n", "output_dir = \"./results\"\n", "per_device_train_batch_size = 2 # reduced batch size to avoid OOM\n", "gradient_accumulation_steps = 2\n", "optim = \"adamw_torch\"\n", "save_steps = 10\n", "logging_steps = 1\n", "learning_rate = 2e-4\n", "max_grad_norm = 0.3\n", "max_steps = 1 \n", "warmup_ratio = 0.1\n", "lr_scheduler_type = \"cosine\"\n", "training_arguments = TrainingArguments(\n", " output_dir=output_dir,\n", " per_device_train_batch_size=per_device_train_batch_size,\n", " gradient_accumulation_steps=gradient_accumulation_steps,\n", " optim=optim,\n", " save_steps=save_steps,\n", " logging_steps=logging_steps,\n", " learning_rate=learning_rate,\n", " fp16=True,\n", " max_grad_norm=max_grad_norm,\n", " max_steps=max_steps,\n", " warmup_ratio=warmup_ratio,\n", " group_by_length=True,\n", " lr_scheduler_type=lr_scheduler_type,\n", " gradient_checkpointing=True, # gradient checkpointing\n", " #report_to=\"wandb\",\n", ")\n", "# Trainer\n", "trainer = SFTTrainer(\n", " model=model,\n", " train_dataset=dataset,\n", " peft_config=peft_config,\n", " dataset_text_field=\"context\",\n", " max_seq_length=max_seq_length,\n", " tokenizer=tokenizer,\n", " args=training_arguments,\n", ")\n", "\n", "# Train :)\n", "trainer.train()" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n", "Token is valid (permission: write).\n", "Your token has been saved to /home/wsuser/.cache/huggingface/token\n", "Login successful\n" ] } ], "source": [ "token=\"\"\n", "from huggingface_hub import login, logout\n", "login(token) # non-blocking login" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Multiple GPUS" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "Some weights of BartForCausalLM were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['decoder.embed_tokens.weight', 'lm_head.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } ], "source": [ "import torch\n", "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "from datasets import load_dataset\n", "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "import os\n", "import socket\n", "\n", "# Distributed training setup (assuming all GPUs are available on a single machine)\n", "def init_distributed(rank, world_size):\n", " \"\"\"Initializes distributed training using `nccl` backend.\"\"\"\n", " if rank == 0:\n", " os.environ[\"MASTER_ADDR\"] = socket.gethostname() # Set MASTER_ADDR using rank 0's hostname\n", " else:\n", " # Wait a bit to ensure MASTER_ADDR is set before other ranks try to use it\n", " import time\n", " time.sleep(5)\n", " os.environ[\"MASTER_PORT\"] = \"12345\" # Set MASTER_PORT environment variable\n", " os.environ[\"RANK\"] = str(rank) # Set RANK environment variable\n", " os.environ[\"WORLD_SIZE\"] = str(world_size) # Set WORLD_SIZE environment variable\n", " torch.distributed.init_process_group(backend='nccl', init_method='env://')\n", "\n", "# Cleanup after training\n", "def cleanup_distributed():\n", " if torch.distributed.is_initialized():\n", " torch.distributed.destroy_process_group()\n", "\n", "# Model and tokenizer selection\n", "model_name = \"facebook/bart-base\" # Replace with your desired model\n", "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", "model = AutoModelForCausalLM.from_pretrained(model_name)\n", "\n", "# Dataset loading (replace with your dataset and field names)\n", "dataset = load_dataset(\"glue\", \"mnli\", split=\"train\")\n", "text_field = \"premise\" # Assuming premise is the field containing text for prediction\n", "\n", "# Training arguments (adjust hyperparameters as needed)\n", "training_args = TrainingArguments(\n", " output_dir=\"./results\",\n", " per_device_train_batch_size=2, # Adjust based on GPU memory (might need to adjust)\n", " save_steps=500,\n", " save_total_limit=2,\n", " num_train_epochs=3, # Adjust training time as needed\n", ")\n", "\n", "world_size = torch.cuda.device_count()\n", "if world_size > 1:\n", " # Initialize distributed training\n", " init_distributed(rank=0, world_size=world_size) # Rank is assumed to be 0 here\n", "\n", " # Wrap model in DDP for distributed training\n", " model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[torch.cuda.current_device()])\n", "\n", " # Create SFTTrainer with distributed settings\n", " trainer = SFTTrainer(\n", " model=model,\n", " args=training_args, # Pass training_args as 'args' instead of 'training_args'\n", " train_dataset=dataset,\n", " dataset_text_field=text_field,\n", " compute_metrics=None, # You can define your custom metrics here\n", " )\n", " print(\"Trainer For distributed training loaded\")\n", "else:\n", " # For single-GPU training\n", " trainer = SFTTrainer(\n", " model=model,\n", " args=training_args, # Pass training_args as 'args' instead of 'training_args'\n", " train_dataset=dataset,\n", " dataset_text_field=text_field,\n", " compute_metrics=None, # You can define your custom metrics here\n", " )\n", " print(\"Trainer For single-GPU loaded\")\n", "\n", "# Start training\n", "trainer.train()\n", "\n", "# Cleanup after training\n", "cleanup_distributed()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "Some weights of BartForCausalLM were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['decoder.embed_tokens.weight', 'lm_head.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", "/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/trl/trainer/sft_trainer.py:246: UserWarning: You didn't pass a `max_seq_length` argument to the SFTTrainer, this will default to 1024\n", " warnings.warn(\n", "Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Trainer For single-GPU loaded\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n", " warnings.warn('Was asked to gather along dimension 0, but all '\n" ] }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [ 329/294528 00:36 < 9:12:58, 8.87 it/s, Epoch 0.00/3]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[1], line 66\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTrainer For single-GPU loaded\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 65\u001b[0m \u001b[38;5;66;03m# Start training\u001b[39;00m\n\u001b[0;32m---> 66\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 68\u001b[0m \u001b[38;5;66;03m# Cleanup after training\u001b[39;00m\n\u001b[1;32m 69\u001b[0m cleanup_distributed()\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/trl/trainer/sft_trainer.py:361\u001b[0m, in \u001b[0;36mSFTTrainer.train\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 358\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mneftune_noise_alpha \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_trainer_supports_neftune:\n\u001b[1;32m 359\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_trl_activate_neftune(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel)\n\u001b[0;32m--> 361\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 363\u001b[0m \u001b[38;5;66;03m# After training we make sure to retrieve back the original forward pass method\u001b[39;00m\n\u001b[1;32m 364\u001b[0m \u001b[38;5;66;03m# for the embedding layer by removing the forward post hook.\u001b[39;00m\n\u001b[1;32m 365\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mneftune_noise_alpha \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_trainer_supports_neftune:\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/transformers/trainer.py:1859\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1857\u001b[0m hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n\u001b[1;32m 1858\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1859\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1860\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1861\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1862\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1863\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1864\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/transformers/trainer.py:2203\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 2200\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_step_begin(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n\u001b[1;32m 2202\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maccelerator\u001b[38;5;241m.\u001b[39maccumulate(model):\n\u001b[0;32m-> 2203\u001b[0m tr_loss_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtraining_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2205\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 2206\u001b[0m args\u001b[38;5;241m.\u001b[39mlogging_nan_inf_filter\n\u001b[1;32m 2207\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torch_xla_available()\n\u001b[1;32m 2208\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m (torch\u001b[38;5;241m.\u001b[39misnan(tr_loss_step) \u001b[38;5;129;01mor\u001b[39;00m torch\u001b[38;5;241m.\u001b[39misinf(tr_loss_step))\n\u001b[1;32m 2209\u001b[0m ):\n\u001b[1;32m 2210\u001b[0m \u001b[38;5;66;03m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[1;32m 2211\u001b[0m tr_loss \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m tr_loss \u001b[38;5;241m/\u001b[39m (\u001b[38;5;241m1\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_globalstep_last_logged)\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/transformers/trainer.py:3138\u001b[0m, in \u001b[0;36mTrainer.training_step\u001b[0;34m(self, model, inputs)\u001b[0m\n\u001b[1;32m 3135\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m loss_mb\u001b[38;5;241m.\u001b[39mreduce_mean()\u001b[38;5;241m.\u001b[39mdetach()\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[1;32m 3137\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompute_loss_context_manager():\n\u001b[0;32m-> 3138\u001b[0m loss \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute_loss\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3140\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mn_gpu \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 3141\u001b[0m loss \u001b[38;5;241m=\u001b[39m loss\u001b[38;5;241m.\u001b[39mmean() \u001b[38;5;66;03m# mean() to average on multi-gpu parallel training\u001b[39;00m\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/transformers/trainer.py:3161\u001b[0m, in \u001b[0;36mTrainer.compute_loss\u001b[0;34m(self, model, inputs, return_outputs)\u001b[0m\n\u001b[1;32m 3159\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 3160\u001b[0m labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 3161\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3162\u001b[0m \u001b[38;5;66;03m# Save past state if it exists\u001b[39;00m\n\u001b[1;32m 3163\u001b[0m \u001b[38;5;66;03m# TODO: this needs to be fixed and made cleaner later.\u001b[39;00m\n\u001b[1;32m 3164\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mpast_index \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1509\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1518\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1519\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1523\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/parallel/data_parallel.py:185\u001b[0m, in \u001b[0;36mDataParallel.forward\u001b[0;34m(self, *inputs, **kwargs)\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodule(\u001b[38;5;241m*\u001b[39minputs[\u001b[38;5;241m0\u001b[39m], \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmodule_kwargs[\u001b[38;5;241m0\u001b[39m])\n\u001b[1;32m 184\u001b[0m replicas \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreplicate(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodule, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdevice_ids[:\u001b[38;5;28mlen\u001b[39m(inputs)])\n\u001b[0;32m--> 185\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparallel_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreplicas\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodule_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 186\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgather(outputs, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput_device)\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/parallel/data_parallel.py:200\u001b[0m, in \u001b[0;36mDataParallel.parallel_apply\u001b[0;34m(self, replicas, inputs, kwargs)\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mparallel_apply\u001b[39m(\u001b[38;5;28mself\u001b[39m, replicas: Sequence[T], inputs: Sequence[Any], kwargs: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m List[Any]:\n\u001b[0;32m--> 200\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mparallel_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreplicas\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdevice_ids\u001b[49m\u001b[43m[\u001b[49m\u001b[43m:\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mreplicas\u001b[49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/parallel/parallel_apply.py:100\u001b[0m, in \u001b[0;36mparallel_apply\u001b[0;34m(modules, inputs, kwargs_tup, devices)\u001b[0m\n\u001b[1;32m 98\u001b[0m thread\u001b[38;5;241m.\u001b[39mstart()\n\u001b[1;32m 99\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m thread \u001b[38;5;129;01min\u001b[39;00m threads:\n\u001b[0;32m--> 100\u001b[0m \u001b[43mthread\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 101\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 102\u001b[0m _worker(\u001b[38;5;241m0\u001b[39m, modules[\u001b[38;5;241m0\u001b[39m], inputs[\u001b[38;5;241m0\u001b[39m], kwargs_tup[\u001b[38;5;241m0\u001b[39m], devices[\u001b[38;5;241m0\u001b[39m], streams[\u001b[38;5;241m0\u001b[39m])\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/threading.py:1096\u001b[0m, in \u001b[0;36mThread.join\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 1093\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcannot join current thread\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1095\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1096\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_wait_for_tstate_lock\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1097\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1098\u001b[0m \u001b[38;5;66;03m# the behavior of a negative timeout isn't documented, but\u001b[39;00m\n\u001b[1;32m 1099\u001b[0m \u001b[38;5;66;03m# historically .join(timeout=x) for x<0 has acted as if timeout=0\u001b[39;00m\n\u001b[1;32m 1100\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_wait_for_tstate_lock(timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mmax\u001b[39m(timeout, \u001b[38;5;241m0\u001b[39m))\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/threading.py:1116\u001b[0m, in \u001b[0;36mThread._wait_for_tstate_lock\u001b[0;34m(self, block, timeout)\u001b[0m\n\u001b[1;32m 1113\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 1115\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1116\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[43mlock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43macquire\u001b[49m\u001b[43m(\u001b[49m\u001b[43mblock\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m 1117\u001b[0m lock\u001b[38;5;241m.\u001b[39mrelease()\n\u001b[1;32m 1118\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_stop()\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "import torch\n", "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "from datasets import load_dataset\n", "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "import os\n", "\n", "# Distributed training setup (assuming all GPUs are available on a single machine)\n", "def init_distributed():\n", " # Replace with actual hostname or IP if using multiple machines\n", " os.environ[\"MASTER_ADDR\"] = \"localhost\"\n", " os.environ[\"MASTER_PORT\"] = \"12345\"\n", " torch.distributed.init_process_group(backend='nccl', world_size=torch.cuda.device_count(), rank=rank)\n", "\n", "def cleanup_distributed():\n", " torch.distributed.destroy_process_group()\n", "\n", "# Model and tokenizer selection\n", "model_name = \"facebook/bart-base\" # Replace with your desired model\n", "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", "model = AutoModelForCausalLM.from_pretrained(model_name)\n", "\n", "# Dataset loading (replace with your dataset and field names)\n", "dataset = load_dataset(\"glue\", \"mnli\", split=\"train\")\n", "text_field = \"premise\" # Assuming premise is the field containing text for prediction\n", "\n", "# Training arguments (adjust hyperparameters as needed)\n", "training_args = TrainingArguments(\n", " output_dir=\"./results\",\n", " per_device_train_batch_size=2, # Adjust based on GPU memory\n", " save_steps=500,\n", " save_total_limit=2,\n", " num_train_epochs=3, # Adjust training time as needed\n", ")\n", "\n", "# Distributed training setup with SFTTrainer\n", "if torch.distributed.is_initialized():\n", " rank = torch.distributed.get_rank()\n", " world_size = torch.distributed.get_world_size()\n", " # Wrap model in DDP for distributed training\n", " model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank])\n", "\n", " # Create SFTTrainer with distributed settings\n", " trainer = SFTTrainer(\n", " model=model,\n", " args=training_args, # Pass training_args as 'args' instead of 'training_args'\n", " train_dataset=dataset,\n", " dataset_text_field=text_field,\n", " compute_metrics=None, # You can define your custom metrics here\n", " world_size=world_size,\n", " rank=rank,\n", " )\n", " print(f\"Trainer For distributed training loaded on rank {rank}\")\n", "else:\n", " # For single-GPU training\n", " trainer = SFTTrainer(\n", " model=model,\n", " args=training_args, # Pass training_args as 'args' instead of 'training_args'\n", " train_dataset=dataset,\n", " dataset_text_field=text_field,\n", " compute_metrics=None, # You can define your custom metrics here\n", " )\n", " print(\"Trainer For single-GPU loaded\")\n", "\n", "# Start training\n", "trainer.train()\n", "\n", "# Cleanup after training\n", "cleanup_distributed()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Traceback (most recent call last):\n", " File \"\", line 1, in \n", " File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/multiprocessing/spawn.py\", line 116, in spawn_main\n", " exitcode = _main(fd, parent_sentinel)\n", " File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/multiprocessing/spawn.py\", line 126, in _main\n", " self = reduction.pickle.load(from_parent)\n", "AttributeError: Can't get attribute 'main_worker' on \n", "Traceback (most recent call last):\n", " File \"\", line 1, in \n", " File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/multiprocessing/spawn.py\", line 116, in spawn_main\n", " exitcode = _main(fd, parent_sentinel)\n", " File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/multiprocessing/spawn.py\", line 126, in _main\n", " self = reduction.pickle.load(from_parent)\n", "AttributeError: Can't get attribute 'main_worker' on \n" ] } ], "source": [ "import os\n", "import torch\n", "import torch.multiprocessing as mp\n", "from datasets import load_dataset\n", "import torch\n", "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer\n", "from peft import LoraConfig\n", "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "from torch.nn.parallel import DistributedDataParallel as DDP\n", "\n", "\n", "# Distributed training setup\n", "def init_distributed():\n", " os.environ[\"MASTER_ADDR\"] = \"localhost\"\n", " os.environ[\"MASTER_PORT\"] = \"12345\"\n", " torch.distributed.init_process_group(backend='nccl', world_size=torch.cuda.device_count(), rank=rank)\n", "\n", "def cleanup_distributed():\n", " torch.distributed.destroy_process_group()\n", "\n", "def main_worker(rank, world_size):\n", " init_distributed()\n", "\n", " # Your model training and fine-tuning code goes here\n", " # Load the dataset\n", " dataset_name = \"ruslanmv/ai-medical-dataset\"\n", " dataset = load_dataset(dataset_name, split=\"train\")\n", " # Select the first 1M rows of the dataset\n", " dataset = dataset.select(range(100))\n", "\n", " # Load the model + tokenizer\n", " model_name = \"meta-llama/Meta-Llama-3-8B-Instruct\"\n", " tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n", " tokenizer.pad_token = tokenizer.eos_token\n", " bnb_config = BitsAndBytesConfig(\n", " load_in_4bit=True,\n", " bnb_4bit_quant_type=\"nf4\",\n", " bnb_4bit_compute_dtype=torch.float16,\n", " )\n", " model = AutoModelForCausalLM.from_pretrained(\n", " model_name,\n", " quantization_config=bnb_config,\n", " trust_remote_code=True,\n", " use_cache=False,\n", " )\n", "\n", " # Check for available GPUs\n", " device = torch.device(f\"cuda:{rank}\" if torch.cuda.is_available() else \"cpu\")\n", "\n", " # PEFT config\n", " lora_alpha = 1\n", " lora_dropout = 0.1\n", " lora_r = 32 # 64\n", " peft_config = LoraConfig(\n", " lora_alpha=lora_alpha,\n", " lora_dropout=lora_dropout,\n", " task_type=\"CAUSAL_LM\",\n", " target_modules=[\"k_proj\", \"q_proj\", \"v_proj\", \"up_proj\", \"down_proj\", \"gate_proj\"],\n", " modules_to_save=[\"embed_tokens\", \"input_layernorm\", \"post_attention_layernorm\", \"norm\"],\n", " )\n", "\n", " # Args\n", " max_seq_length = 512\n", " output_dir = \"./results\"\n", " per_device_train_batch_size = 2 # reduced batch size to avoid OOM\n", " gradient_accumulation_steps = 2\n", " optim = \"adamw_torch\"\n", " save_steps = 10\n", " logging_steps = 1\n", " learning_rate = 2e-4\n", " max_grad_norm = 0.3\n", " max_steps = 1 # 300 Approx the size of guanaco at bs 8, ga 2, 2 GPUs.\n", " warmup_ratio = 0.1\n", " lr_scheduler_type = \"cosine\"\n", " training_arguments = TrainingArguments(\n", " output_dir=output_dir,\n", " per_device_train_batch_size=per_device_train_batch_size,\n", " gradient_accumulation_steps=gradient_accumulation_steps,\n", " optim=optim,\n", " save_steps=save_steps,\n", " logging_steps=logging_steps,\n", " learning_rate=learning_rate,\n", " fp16=True,\n", " max_grad_norm=max_grad_norm,\n", " max_steps=max_steps,\n", " warmup_ratio=warmup_ratio,\n", " group_by_length=True,\n", " lr_scheduler_type=lr_scheduler_type,\n", " gradient_checkpointing=True, # gradient checkpointing\n", " #report_to=\"wandb\",\n", " )\n", "\n", " # Trainer\n", " trainer = SFTTrainer(\n", " model=model,\n", " train_dataset=dataset,\n", " peft_config=peft_config,\n", " dataset_text_field=\"context\",\n", " max_seq_length=max_seq_length,\n", " tokenizer=tokenizer,\n", " args=training_arguments,\n", " )\n", "\n", " # Train :)\n", " trainer.train()\n", " cleanup_distributed()\n", "\n", "\n", "if __name__ == \"__main__\":\n", " world_size = torch.cuda.device_count()\n", " mp.set_start_method('spawn') # Add this line to fix the error\n", " processes = []\n", " for rank in range(world_size):\n", " p = mp.Process(target=main_worker, args=(rank, world_size))\n", " p.start()\n", " processes.append(p)\n", " for p in processes:\n", " p.join()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "Supervised Finetuning Trainer (SFT Trainer)." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "def finetune():\n", " from datasets import load_dataset\n", " import torch\n", " from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer\n", " from peft import LoraConfig\n", " from trl import SFTTrainer\n", " from transformers import TrainingArguments\n", " from torch.nn.parallel import DistributedDataParallel as DDP\n", " # Load the dataset\n", " dataset_name = \"ruslanmv/ai-medical-dataset\"\n", " dataset = load_dataset(dataset_name, split=\"train\")\n", " # Select the first 1M rows of the dataset\n", " dataset = dataset.select(range(100))\n", " # Load the model + tokenizer\n", " model_name = \"meta-llama/Meta-Llama-3-8B-Instruct\"\n", " tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n", " tokenizer.pad_token = tokenizer.eos_token\n", " bnb_config = BitsAndBytesConfig(\n", " load_in_4bit=True,\n", " bnb_4bit_quant_type=\"nf4\",\n", " bnb_4bit_compute_dtype=torch.float16,\n", " )\n", " model = AutoModelForCausalLM.from_pretrained(\n", " model_name,\n", " quantization_config=bnb_config,\n", " trust_remote_code=True,\n", " use_cache=False,\n", " )\n", " # Check for available GPUs\n", " if torch.cuda.device_count() > 1:\n", " print(\"Multiple GPUs detected, enabling DataParallel...\")\n", " model = DDP(model) # Wrap the model with DDP\n", " else:\n", " print(\"Using single GPU...\")\n", " # PEFT config\n", " lora_alpha = 16\n", " lora_dropout = 0.1\n", " lora_r = 32 # 64\n", " peft_config = LoraConfig(\n", " lora_alpha=lora_alpha,\n", " lora_dropout=lora_dropout,\n", " r=lora_r,\n", " bias=\"none\",\n", " task_type=\"CAUSAL_LM\",\n", " target_modules=[\"k_proj\", \"q_proj\", \"v_proj\", \"up_proj\", \"down_proj\", \"gate_proj\"],\n", " modules_to_save=[\"embed_tokens\", \"input_layernorm\", \"post_attention_layernorm\", \"norm\"],\n", " )\n", " # Args\n", " max_seq_length = 512\n", " output_dir = \"./results\"\n", " per_device_train_batch_size = 2 # reduced batch size to avoid OOM\n", " gradient_accumulation_steps = 2\n", " optim = \"adamw_torch\"\n", " save_steps = 10\n", " logging_steps = 1\n", " learning_rate = 2e-4\n", " max_grad_norm = 0.3\n", " max_steps = 1 # 300 Approx the size of guanaco at bs 8, ga 2, 2 GPUs.\n", " warmup_ratio = 0.1\n", " lr_scheduler_type = \"cosine\"\n", "\n", " training_arguments = TrainingArguments(\n", " output_dir=output_dir,\n", " per_device_train_batch_size=per_device_train_batch_size,\n", " gradient_accumulation_steps=gradient_accumulation_steps,\n", " optim=optim,\n", " save_steps=save_steps,\n", " logging_steps=logging_steps,\n", " learning_rate=learning_rate,\n", " fp16=True,\n", " max_grad_norm=max_grad_norm,\n", " max_steps=max_steps,\n", " warmup_ratio=warmup_ratio,\n", " group_by_length=True,\n", " lr_scheduler_type=lr_scheduler_type,\n", " gradient_checkpointing=True, # gradient checkpointing\n", " #report_to=\"wandb\",\n", " )\n", " # Trainer\n", " trainer = SFTTrainer(\n", " model=model,\n", " train_dataset=dataset,\n", " peft_config=peft_config,\n", " dataset_text_field=\"context\",\n", " max_seq_length=max_seq_length,\n", " tokenizer=tokenizer,\n", " args=training_arguments,\n", " )\n", " # Train :)\n", " trainer.train()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Initializing distributed process group...\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d32deaece84b4e2382865810c9c3f1f4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Resolving data files: 0%| | 0/18 [00:00.\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] } ], "source": [ "# Loading the model and the tokenizer for the model\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name=config[\"model_config\"].get(\"base_model\"),\n", " max_seq_length=config[\"model_config\"].get(\"max_seq_length\"),\n", " dtype=config[\"model_config\"].get(\"dtype\"),\n", " load_in_4bit=config[\"model_config\"].get(\"load_in_4bit\"),\n", "\n", "\n", ")" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "# Setup for QLoRA/LoRA peft of the base model\n", "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = config.get(\"lora_config\").get(\"r\"),\n", " target_modules = config.get(\"lora_config\").get(\"target_modules\"),\n", " lora_alpha = config.get(\"lora_config\").get(\"lora_alpha\"),\n", " lora_dropout = config.get(\"lora_config\").get(\"lora_dropout\"),\n", " bias = config.get(\"lora_config\").get(\"bias\"),\n", " use_gradient_checkpointing = config.get(\"lora_config\").get(\"use_gradient_checkpointing\"),\n", " random_state = 42,\n", " use_rslora = config.get(\"lora_config\").get(\"use_rslora\"),\n", " use_dora = config.get(\"lora_config\").get(\"use_dora\"),\n", " loftq_config = config.get(\"lora_config\").get(\"loftq_config\"),\n", ")" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "#from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer\n", "#tokenizer = AutoTokenizer.from_pretrained(config.get(\"model_config\").get(\"base_model\"))\n", "tokenizer.add_eos_token = True\n", "tokenizer.pad_token_id = 0\n", "tokenizer.padding_side = \"left\"" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dataset does not exist.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2f16e19d432e4cfb8518dc80b1f54552", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading readme: 0%| | 0.00/2.97k [00:00 1:\n", " print(\"Multiple GPUs enabled\")\n", " devices = [f\"cuda:{i}\" for i in range(torch.cuda.device_count())]\n", " model_parallel = torch.nn.DataParallel(model, device_ids= devices ) #[0, 1]\n", " # Access the original model from the DataParallel object\n", " model = model_parallel.module\n", " else:\n", " print(\"No DataParallel \")\n", " device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", " model.to(device) " ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['cuda:0', 'cuda:1']" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "devices" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PeftModelForCausalLM(\n", " (base_model): LoraModel(\n", " (model): LlamaForCausalLM(\n", " (model): LlamaModel(\n", " (embed_tokens): Embedding(128256, 4096)\n", " (layers): ModuleList(\n", " (0-31): 32 x LlamaDecoderLayer(\n", " (self_attn): LlamaSdpaAttention(\n", " (q_proj): lora.Linear4bit(\n", " (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Identity()\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=16, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=16, out_features=4096, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (k_proj): lora.Linear4bit(\n", " (base_layer): Linear4bit(in_features=4096, out_features=1024, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Identity()\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=16, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=16, out_features=1024, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (v_proj): lora.Linear4bit(\n", " (base_layer): Linear4bit(in_features=4096, out_features=1024, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Identity()\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=16, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=16, out_features=1024, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (o_proj): lora.Linear4bit(\n", " (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Identity()\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=16, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=16, out_features=4096, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (rotary_emb): LlamaRotaryEmbedding()\n", " )\n", " (mlp): LlamaMLP(\n", " (gate_proj): lora.Linear4bit(\n", " (base_layer): Linear4bit(in_features=4096, out_features=14336, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Identity()\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=16, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=16, out_features=14336, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (up_proj): lora.Linear4bit(\n", " (base_layer): Linear4bit(in_features=4096, out_features=14336, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Identity()\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=16, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=16, out_features=14336, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (down_proj): lora.Linear4bit(\n", " (base_layer): Linear4bit(in_features=14336, out_features=4096, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Identity()\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=14336, out_features=16, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=16, out_features=4096, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (act_fn): SiLU()\n", " )\n", " (input_layernorm): LlamaRMSNorm()\n", " (post_attention_layernorm): LlamaRMSNorm()\n", " )\n", " )\n", " (norm): LlamaRMSNorm()\n", " )\n", " (lm_head): Linear(in_features=4096, out_features=128256, bias=False)\n", " )\n", " )\n", ")" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "abb0b4aadff048d9b612a7a8e6ebd4a9", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map (num_proc=2): 0%| | 0/100 [00:00 2\u001b[0m trainer_stats \u001b[38;5;241m=\u001b[39m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/trl/trainer/sft_trainer.py:361\u001b[0m, in \u001b[0;36mSFTTrainer.train\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 358\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mneftune_noise_alpha \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_trainer_supports_neftune:\n\u001b[1;32m 359\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_trl_activate_neftune(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel)\n\u001b[0;32m--> 361\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 363\u001b[0m \u001b[38;5;66;03m# After training we make sure to retrieve back the original forward pass method\u001b[39;00m\n\u001b[1;32m 364\u001b[0m \u001b[38;5;66;03m# for the embedding layer by removing the forward post hook.\u001b[39;00m\n\u001b[1;32m 365\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mneftune_noise_alpha \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_trainer_supports_neftune:\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/transformers/trainer.py:1859\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1857\u001b[0m hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n\u001b[1;32m 1858\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1859\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1860\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1861\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1862\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1863\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1864\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m:361\u001b[0m, in \u001b[0;36m_fast_inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/transformers/trainer.py:3138\u001b[0m, in \u001b[0;36mTrainer.training_step\u001b[0;34m(self, model, inputs)\u001b[0m\n\u001b[1;32m 3135\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m loss_mb\u001b[38;5;241m.\u001b[39mreduce_mean()\u001b[38;5;241m.\u001b[39mdetach()\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[1;32m 3137\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompute_loss_context_manager():\n\u001b[0;32m-> 3138\u001b[0m loss \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute_loss\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3140\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mn_gpu \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 3141\u001b[0m loss \u001b[38;5;241m=\u001b[39m loss\u001b[38;5;241m.\u001b[39mmean() \u001b[38;5;66;03m# mean() to average on multi-gpu parallel training\u001b[39;00m\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/transformers/trainer.py:3161\u001b[0m, in \u001b[0;36mTrainer.compute_loss\u001b[0;34m(self, model, inputs, return_outputs)\u001b[0m\n\u001b[1;32m 3159\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 3160\u001b[0m labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 3161\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3162\u001b[0m \u001b[38;5;66;03m# Save past state if it exists\u001b[39;00m\n\u001b[1;32m 3163\u001b[0m \u001b[38;5;66;03m# TODO: this needs to be fixed and made cleaner later.\u001b[39;00m\n\u001b[1;32m 3164\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mpast_index \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1509\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1518\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1519\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1523\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/parallel/data_parallel.py:185\u001b[0m, in \u001b[0;36mDataParallel.forward\u001b[0;34m(self, *inputs, **kwargs)\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodule(\u001b[38;5;241m*\u001b[39minputs[\u001b[38;5;241m0\u001b[39m], \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmodule_kwargs[\u001b[38;5;241m0\u001b[39m])\n\u001b[1;32m 184\u001b[0m replicas \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreplicate(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodule, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdevice_ids[:\u001b[38;5;28mlen\u001b[39m(inputs)])\n\u001b[0;32m--> 185\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparallel_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreplicas\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodule_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 186\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgather(outputs, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput_device)\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/parallel/data_parallel.py:200\u001b[0m, in \u001b[0;36mDataParallel.parallel_apply\u001b[0;34m(self, replicas, inputs, kwargs)\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mparallel_apply\u001b[39m(\u001b[38;5;28mself\u001b[39m, replicas: Sequence[T], inputs: Sequence[Any], kwargs: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m List[Any]:\n\u001b[0;32m--> 200\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mparallel_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreplicas\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdevice_ids\u001b[49m\u001b[43m[\u001b[49m\u001b[43m:\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mreplicas\u001b[49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/parallel/parallel_apply.py:108\u001b[0m, in \u001b[0;36mparallel_apply\u001b[0;34m(modules, inputs, kwargs_tup, devices)\u001b[0m\n\u001b[1;32m 106\u001b[0m output \u001b[38;5;241m=\u001b[39m results[i]\n\u001b[1;32m 107\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(output, ExceptionWrapper):\n\u001b[0;32m--> 108\u001b[0m \u001b[43moutput\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreraise\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 109\u001b[0m outputs\u001b[38;5;241m.\u001b[39mappend(output)\n\u001b[1;32m 110\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m outputs\n", "File \u001b[0;32m/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/_utils.py:722\u001b[0m, in \u001b[0;36mExceptionWrapper.reraise\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 718\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 719\u001b[0m \u001b[38;5;66;03m# If the exception takes multiple arguments, don't try to\u001b[39;00m\n\u001b[1;32m 720\u001b[0m \u001b[38;5;66;03m# instantiate since we don't know how to\u001b[39;00m\n\u001b[1;32m 721\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(msg) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 722\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exception\n", "\u001b[0;31mRuntimeError\u001b[0m: Caught RuntimeError in replica 1 on device 1.\nOriginal Traceback (most recent call last):\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/parallel/parallel_apply.py\", line 83, in _worker\n output = module(*input, **kwargs)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1511, in _wrapped_call_impl\n return self._call_impl(*args, **kwargs)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1520, in _call_impl\n return forward_call(*args, **kwargs)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/unsloth/models/llama.py\", line 882, in PeftModelForCausalLM_fast_forward\n return self.base_model(\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1511, in _wrapped_call_impl\n return self._call_impl(*args, **kwargs)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1520, in _call_impl\n return forward_call(*args, **kwargs)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/peft/tuners/tuners_utils.py\", line 161, in forward\n return self.model.forward(*args, **kwargs)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/accelerate/hooks.py\", line 166, in new_forward\n output = module._old_forward(*args, **kwargs)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/unsloth/models/llama.py\", line 813, in _CausalLM_fast_forward\n outputs = self.model(\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1511, in _wrapped_call_impl\n return self._call_impl(*args, **kwargs)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1520, in _call_impl\n return forward_call(*args, **kwargs)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/accelerate/hooks.py\", line 166, in new_forward\n output = module._old_forward(*args, **kwargs)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/unsloth/models/llama.py\", line 668, in LlamaModel_fast_forward\n layer_outputs = torch.utils.checkpoint.checkpoint(\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/_compile.py\", line 24, in inner\n return torch._dynamo.disable(fn, recursive)(*args, **kwargs)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py\", line 489, in _fn\n return fn(*args, **kwargs)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/_dynamo/external_utils.py\", line 17, in inner\n return fn(*args, **kwargs)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/utils/checkpoint.py\", line 482, in checkpoint\n return CheckpointFunction.apply(function, preserve, *args)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/autograd/function.py\", line 553, in apply\n return super().apply(*args, **kwargs) # type: ignore[misc]\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/utils/checkpoint.py\", line 261, in forward\n outputs = run_function(*args)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/unsloth/models/llama.py\", line 664, in custom_forward\n return module(*inputs, past_key_value, output_attentions, padding_mask = padding_mask)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1511, in _wrapped_call_impl\n return self._call_impl(*args, **kwargs)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1520, in _call_impl\n return forward_call(*args, **kwargs)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/accelerate/hooks.py\", line 166, in new_forward\n output = module._old_forward(*args, **kwargs)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/unsloth/models/llama.py\", line 433, in LlamaDecoderLayer_fast_forward\n hidden_states, self_attn_weights, present_key_value = self.self_attn(\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1511, in _wrapped_call_impl\n return self._call_impl(*args, **kwargs)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1520, in _call_impl\n return forward_call(*args, **kwargs)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/accelerate/hooks.py\", line 166, in new_forward\n output = module._old_forward(*args, **kwargs)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/unsloth/models/llama.py\", line 308, in LlamaAttention_fast_forward\n Q, K, V = self.apply_qkv(self, hidden_states)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/unsloth/kernels/fast_lora.py\", line 312, in apply_lora_qkv\n Q, K, V = LoRA_QKV.apply(X,\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/autograd/function.py\", line 553, in apply\n return super().apply(*args, **kwargs) # type: ignore[misc]\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/torch/cuda/amp/autocast_mode.py\", line 115, in decorate_fwd\n return fwd(*args, **kwargs)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/unsloth/kernels/fast_lora.py\", line 227, in forward\n Q = matmul_lora(X, QW, QW_quant, QA, QB, QS)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/unsloth/kernels/utils.py\", line 225, in matmul_lora\n W = fast_dequantize(W.t(), W_quant)\n File \"/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/unsloth/kernels/utils.py\", line 104, in fast_dequantize\n out_absmax += offset\nRuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:1 and cuda:0!\n" ] } ], "source": [ "# Training the model\n", "trainer_stats = trainer.train()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "YQFEr64koXp1", "outputId": "2e1b3775-1f5d-4b8e-a0ef-32266cb7fa2a" }, "outputs": [], "source": [ "# Memory statistics after training\n", "used_memory = round(torch.cuda.max_memory_allocated() / 1024**3, 2)\n", "used_memory_lora = round(used_memory - reserved_memory, 2)\n", "used_memory_persentage = round((used_memory / max_memory) * 100, 2)\n", "used_memory_lora_persentage = round((used_memory_lora / max_memory) * 100, 2)\n", "print(f\"Used Memory: {used_memory}GB ({used_memory_persentage}%)\")\n", "print(f\"Used Memory for training(fine-tuning) LoRA: {used_memory_lora}GB ({used_memory_lora_persentage}%)\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "new_model=config.get(\"model_config\").get(\"finetuned_model\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "new_model" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "1YJB4bZyoXp1" }, "outputs": [], "source": [ "# Saving the trainer stats\n", "with open(\"trainer_stats.json\", \"w\") as f:\n", " json.dump(trainer_stats, f, indent=4)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Save and push the adapter to HF\n", "import os\n", "current_directory = os.getcwd()\n", "# New model name\n", "new_model = config.get(\"model_config\").get(\"finetuned_name\") #\"Medical-Mind-Llama-3-8b\"\n", "# Save the fine-tuned model\n", "save_path = os.path.join(current_directory, \"models\", new_model)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#os.makedirs(save_path, exist_ok=True) # Create directory if it doesn't exist\n", "#trainer.model.save_pretrained(save_path)\n", "tokenizer.save_pretrained(save_path)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "help(model.save_pretrained_merged)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To save the final model as LoRA adapters, either use Huggingface's push_to_hub for an online save or save_pretrained for a local save.\n", "\n", "[NOTE] This ONLY saves the LoRA adapters, and not the full model. To save to 16bit or GGUF, scroll down!" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Save the model to the created directory\n", "# `lora`: Save LoRA adapters with no merging. Useful for HF inference.\n", "#model.save_pretrained(save_path)\n" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "# Saving the model using merged_16bit(float16), \n", "#`16bit`: Merge LoRA into float16 weights. Useful for GGUF / llama.cpp.\n", "#model.save_pretrained_merged(save_path, tokenizer, save_method = \"merged_16bit\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# `4bit`: Merge LoRA into int4 weights. Useful for DPO / HF inference.\n", "model.save_pretrained_merged(save_path, tokenizer, save_method = \"merged_4bit_forced\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "save_path" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Get the list of files in the directory\n", "files_in_model_dir = os.listdir(save_path)\n", "# Print the list of files\n", "print(\"Files in the directory:\")\n", "for file in files_in_model_dir:\n", " print(file)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "from huggingface_hub import HfApi\n", "def upload_folder(folder_path, repository_name, path_in_repo):\n", " api = HfApi()\n", " \n", " # Check if the repository exists, if not, create it\n", " repo_exists = api.repo_exists(repository_name)\n", " if not repo_exists:\n", " api.create_repo(repository_name)\n", " print(f\"Repository '{repository_name}' created on Hugging Face Hub.\")\n", "\n", " for root, dirs, files in os.walk(folder_path):\n", " for file in files:\n", " file_path = os.path.join(root, file)\n", " relative_path = os.path.relpath(file_path, folder_path)\n", " repo_path = os.path.join(path_in_repo, relative_path)\n", " api.upload_file(path_or_fileobj=file_path, repo_id=repository_name, path_in_repo=repo_path)\n", " print(f\"{repo_path} uploaded to {repository_name}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Define the repository name and path in the repository\n", "repository_name = \"ruslanmv/\"+new_model\n", "path_in_repo = \"\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "repository_name" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Upload the folder and its contents to the repository\n", "upload_folder(save_path, repository_name, path_in_repo)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#help(model.push_to_hub_merged)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#save_path='/home/wsuser/work/models/Medical-Mind-Llama-3-8b'\n", "#repo_id='ruslanmv/Medical-Mind-Llama-3-8b'\n", "#commit_message=\"Uploading Model\"\n", "#model.push_to_hub_merged(repo_id, tokenizer, save_method = \"merged_16bit\",commit_message=commit_message)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#model.push_to_hub_merged(config.get(\"model_config\").get(\"finetuned_model\"), tokenizer, save_method = \"merged_4bit\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#model.save_pretrained_gguf(config.get(\"model_config\").get(\"finetuned_model\"), tokenizer)\n", "#model.push_to_hub_gguf(config.get(\"model_config\").get(\"finetuned_model\"), tokenizer,repository_private=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#model.save_pretrained_gguf(config.get(\"model_config\").get(\"finetuned_model\"), tokenizer, quantization_method = \"q4_k_m\")\n", "#model.push_to_hub_gguf(config.get(\"model_config\").get(\"finetuned_model\"), tokenizer, quantization_method = \"q4_k_m\",private=True)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/envs/Python-RT23.1-CUDA/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8f1cc811458d464c906abab21b49d230", "version_major": 2, "version_minor": 0 }, "text/plain": [ "config.json: 0%| | 0.00/1.12k [00:00.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c900fe27e6ab4026bf82f8d9fc8127ce", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model.safetensors.index.json: 0%| | 0.00/132k [00:00system<|end_header_id|> You are a Medical AI chatbot assistant .<|eot_id|><|start_header_id|> user <|end_header_id|>{question}<|eot_id|>\"\n", " # Tokenizing the input and generating the output\n", " inputs = tokenizer([prompt], return_tensors = \"pt\").to(\"cuda\")\n", " outputs = model.generate(**inputs, max_new_tokens = 256, use_cache = True)\n", " answer = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] # Get the first element from the batch\n", "\n", " # Split the answer at the first line break, assuming system intro and question are on separate lines\n", " answer_parts = answer.split(\"\\n\", 1)\n", "\n", " # If there are multiple parts, consider the second part as the answer\n", " if len(answer_parts) > 1:\n", " answer = answer_parts[1].strip() # Remove leading/trailing whitespaces\n", " else:\n", " answer = \"\" # If no split possible, set answer to empty string\n", "\n", " print(f\"Answer: {answer}\") \n", " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Hyperparameter search\n", "**Step 1: Define the Hyperparameter Search Space**\n", "We need to define the search space for the hyperparameters we want to tune. For example, let's say we want to tune the following hyperparameters:\n", "\n", "* `learning_rate`\n", "* `per_device_train_batch_size`\n", "* `gradient_accumulation_steps`\n", "* `warmup_steps`\n", "* `num_train_epochs`\n", "* `lora_alpha`\n", "* `lora_dropout`\n", "\n", "We can define the search space as follows:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import hyperopt\n", "from hyperopt import hp\n", "from hyperopt import Trials\n", "from hyperopt import fmin, tpe, Trials\n", "# Define the search space for hyperparameters\n", "space = {\n", " 'learning_rate': hp.loguniform('learning_rate', -5, -1), # Learning rate in log scale\n", " #'lora_alpha': hp.quniform('lora_alpha', 1, 32, 1), # LoRA alpha with quantized steps\n", " #'lora_dropout': hp.uniform('lora_dropout', 0, 0.5), # LoRA dropout rate\n", "\n", " 'per_device_train_batch_size': hp.quniform('per_device_train_batch_size', 2, 16, q=1), \n", " 'gradient_accumulation_steps': hp.quniform('gradient_accumulation_steps', 1, 8, 1), # Added for exploration\n", " # Uncomment these if you want to tune other hyperparameters\n", " # 'warmup_steps': hp.quniform('warmup_steps', 0, 1000, 1),\n", " # 'num_train_epochs': hp.quniform('num_train_epochs', 1, 5, 1), \n", "\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Step 2. Define the Objective Function**\n", "\n", "The objective function is a function that takes in the hyperparameters, sets them in the `config` dictionary, trains the model, and returns the loss or metric to minimize. We need to modify the previous fine-tuning code to define the objective function." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def objective(params):\n", " # Set hyperparameters in the config dictionary (assuming it's defined elsewhere)\n", " config['training_config']['learning_rate'] = params['learning_rate']\n", " # config['lora_config']['lora_alpha'] = params['lora_alpha']\n", " # config['lora_config']['lora_dropout'] = params['lora_dropout'] \n", " config['training_config']['per_device_train_batch_size'] = params['per_device_train_batch_size']\n", " config['training_config']['gradient_accumulation_steps'] = params['gradient_accumulation_steps']\n", " # ... Set other hyperparameters from params dictionary ... \n", " #config['training_config']['warmup_steps'] = params['warmup_steps']\n", " #config['training_config']['num_train_epochs'] = params['num_train_epochs']\n", "\n", " # Load the model and tokenizer (assuming these are defined elsewhere)\n", " try:\n", " model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name=config.get(\"model_config\").get(\"base_model\"),\n", " max_seq_length=config.get(\"model_config\").get(\"max_seq_length\"),\n", " dtype=config.get(\"model_config\").get(\"dtype\"),\n", " load_in_4bit=config.get(\"model_config\").get(\"load_in_4bit\"),\n", " )\n", " except Exception as e:\n", " print(f\"Error loading model and tokenizer: {e}\")\n", " return float(\"inf\") # Return high value for errors\n", "\n", " # Setup LoRA for the model (assuming FastLanguageModel supports LoRA)\n", " try:\n", " model = FastLanguageModel.get_peft_model(\n", " model,\n", " r=config.get(\"lora_config\").get(\"r\"),\n", " target_modules=config.get(\"lora_config\").get(\"target_modules\"),\n", " lora_alpha=params['lora_alpha'],\n", " lora_dropout=params['lora_dropout'],\n", " bias=config.get(\"lora_config\").get(\"bias\"),\n", " use_gradient_checkpointing=config.get(\"lora_config\").get(\"use_gradient_checkpointing\"),\n", " random_state=42,\n", " use_rslora=config.get(\"lora_config\").get(\"use_rslora\"),\n", " use_dora=config.get(\"lora_config\").get(\"use_dora\"),\n", " loftq_config=config.get(\"lora_config\").get(\"loftq_config\")\n", " )\n", " except Exception as e:\n", " print(f\"Error setting up LoRA: {e}\")\n", " return float(\"inf\") # Return high value for errors\n", " # Train the model on the test dataset (assuming SFTTrainer and training arguments are defined)\n", " try:\n", " trainer = SFTTrainer(\n", " model=model,\n", " tokenizer=tokenizer,\n", " train_dataset=train_dataset,\n", " dataset_text_field=config.get(\"training_dataset\").get(\"input_field\"),\n", " max_seq_length=config.get(\"model_config\").get(\"max_seq_length\"),\n", " dataset_num_proc=2,\n", " packing=False,\n", " args=TrainingArguments(\n", " per_device_train_batch_size=int(params['per_device_train_batch_size']),\n", " gradient_accumulation_steps=params['gradient_accumulation_steps'],\n", " warmup_steps=params['warmup_steps'],\n", " max_steps=config.get(\"training_config\").get(\"max_steps\"),\n", " num_train_epochs=params['num_train_epochs'],\n", " learning_rate=params['learning_rate'],\n", " fp16=config.get(\"training_config\").get(\"fp16\"),\n", " bf16=config.get(\"training_config\").get(\"bf16\"),\n", " logging_steps=config.get(\"training_config\").get(\"logging_steps\"),\n", " optim=config.get(\"training_config\").get(\"optim\"),\n", " weight_decay=config.get(\"training_config\").get(\"weight_decay\"),\n", " lr_scheduler_type=config.get(\"training_config\").get(\"lr_scheduler_type\"),\n", " seed=42,\n", " output_dir=config.get(\"training_config\").get(\"output_dir\")\n", " )\n", " )\n", " trainer_stats = trainer.train()\n", " return trainer_stats.loss # Assuming loss is the metric to minimize\n", " except Exception as e:\n", " print(f\"Error during training: {e}\")\n", " return float(\"inf\") # Return high value for failed trials\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Step 3: Perform Hyperparameter Search**\n", "\n", "Now that we have defined the objective function, we can perform the hyperparameter search using Hyperopt's `fmin` function. We need to specify the objective function, the search space, and the maximum number of evaluations." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "# Create a Trials object to track hyperparameter evaluations\n", "trials = Trials()\n", "# Run hyperparameter optimization using TPE algorithm\n", "best = fmin(objective, space, algo=tpe.suggest, trials=trials, max_evals=2)\n", "# Print the best hyperparameters found during optimization\n", "print(\"Best Hyperparameters:\", best)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import torch\n", "import gc\n", "def reset_gpu_memory():\n", " torch.cuda.empty_cache()\n", " gc.collect()\n", " print(\"GPU memory cleared!\")\n", "# Example usage:\n", "reset_gpu_memory()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Full code version" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#### Fixed Version\n", "import hyperopt\n", "from hyperopt import hp\n", "from hyperopt import Trials\n", "from hyperopt import fmin, tpe, Trials\n", "# Define the search space for hyperparameters\n", "space = {\n", " 'learning_rate' : hp.loguniform('learning_rate', -5, -1), # Learning rate in log scale\n", " 'per_device_train_batch_size': hp.quniform('per_device_train_batch_size', 2, 16, 1), \n", " 'gradient_accumulation_steps': hp.quniform('gradient_accumulation_steps', 1, 8, 1), \n", " # Uncomment these if you want to tune them\n", " #'lora_alpha' : hp.quniform('lora_alpha', 1, 32, 1), # LoRA alpha with quantized steps\n", " #'lora_dropout' : hp.uniform('lora_dropout', 0, 0.5), # LoRA dropout rate\n", " # 'warmup_steps' : hp.quniform('warmup_steps', 0, 1000, 1),\n", " # 'num_train_epochs' : hp.quniform('num_train_epochs', 1, 5, 1),\n", "}\n", "def objective(params):\n", " # Set hyperparameters in the config dictionary (assuming it's defined elsewhere)\n", " config['training_config']['learning_rate']=params['learning_rate']\n", " config['training_config']['per_device_train_batch_size'] = params['per_device_train_batch_size']\n", " config['training_config']['gradient_accumulation_steps'] = params['gradient_accumulation_steps'] \n", " # config['lora_config']['lora_alpha'] = params['lora_alpha']\n", " # config['lora_config']['lora_dropout'] = params['lora_dropout']\n", " # ... Set other hyperparameters from params dictionary ...\n", " #config['training_config']['warmup_steps'] = params['warmup_steps']\n", " #config['training_config']['num_train_epochs'] = params['num_train_epochs']\n", " # Load the model and tokenizer (assuming these are defined elsewhere) \n", " try:\n", " model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = config.get(\"model_config\").get(\"base_model\"),\n", " max_seq_length = config.get(\"model_config\").get(\"max_seq_length\"),\n", " dtype = config.get(\"model_config\").get(\"dtype\"),\n", " load_in_4bit = config.get(\"model_config\").get(\"load_in_4bit\")\n", " )\n", " except Exception as e:\n", " print(f\"Error loading model and tokenizer: {e}\")\n", " return float(\"inf\") # Return high value for errors\n", "\n", " # Setup LoRA for the model (assuming FastLanguageModel supports LoRA)\n", " try:\n", " model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = config.get(\"lora_config\").get(\"r\"),\n", " target_modules = config.get(\"lora_config\").get(\"target_modules\"),\n", " lora_alpha = config.get(\"lora_config\").get('lora_alpha'), #params['lora_alpha'],\n", " lora_dropout = config.get(\"lora_config\").get('lora_dropout'),#params['lora_dropout'],\n", " bias = config.get(\"lora_config\").get(\"bias\"),\n", " use_gradient_checkpointing = config.get(\"lora_config\").get(\"use_gradient_checkpointing\"),\n", " random_state = 42,\n", " use_rslora = config.get(\"lora_config\").get(\"use_rslora\"),\n", " use_dora = config.get(\"lora_config\").get(\"use_dora\"),\n", " loftq_config = config.get(\"lora_config\").get(\"loftq_config\")\n", " )\n", " except Exception as e:\n", " print(f\"Error setting up LoRA: {e}\")\n", " return float(\"inf\") # Return high value for errors\n", " # Train the model on the test dataset (assuming SFTTrainer and training arguments are defined)\n", " try:\n", " trainer = SFTTrainer(\n", " model=model,\n", " tokenizer = tokenizer,\n", " train_dataset = train_dataset,\n", " dataset_text_field = config.get(\"training_dataset\").get(\"input_field\"),\n", " max_seq_length = config.get(\"model_config\").get(\"max_seq_length\"),\n", " dataset_num_proc = 2,\n", " packing = False,\n", " args=TrainingArguments(\n", " per_device_train_batch_size = int(params['per_device_train_batch_size']), #config.get(\"training_config\").get('per_device_train_batch_size'),\n", " gradient_accumulation_steps = params['gradient_accumulation_steps'], #config.get(\"training_config\").get('gradient_accumulation_steps'),\n", " warmup_steps = config.get(\"training_config\").get('warmup_steps'),#params['warmup_steps'],\n", " max_steps = config.get(\"training_config\").get(\"max_steps\"),\n", " num_train_epochs = config.get(\"training_config\").get('num_train_epochs'),#params['num_train_epochs'],\n", " learning_rate = params['learning_rate'],\n", " fp16 = config.get(\"training_config\").get(\"fp16\"),\n", " bf16 = config.get(\"training_config\").get(\"bf16\"),\n", " logging_steps = config.get(\"training_config\").get(\"logging_steps\"),\n", " optim = config.get(\"training_config\").get(\"optim\"),\n", " weight_decay = config.get(\"training_config\").get(\"weight_decay\"),\n", " lr_scheduler_type = config.get(\"training_config\").get(\"lr_scheduler_type\"),\n", " seed = 42,\n", " output_dir = config.get(\"training_config\").get(\"output_dir\")\n", " )\n", " )\n", " trainer_stats = trainer.train()\n", " return trainer_stats.loss # Assuming loss is the metric to minimize\n", " except Exception as e:\n", " print(f\"Error during training: {e}\")\n", " return float(\"inf\") # Return high value for failed trials \n", "# Create a Trials object to track hyperparameter evaluations\n", "trials = Trials()\n", "# Run hyperparameter optimization using TPE algorithm\n", "best = fmin(objective, space, algo=tpe.suggest, trials=trials, max_evals=2)\n", "# Print the best hyperparameters found during optimization\n", "print(\"Best Hyperparameters:\", best) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Analyzing Hyperparameters:\n", "\n", "* **Batch Size**: Generally, increasing the batch size can improve training speed by utilizing hardware resources more efficiently. However, there's a limit beyond which performance degrades. You can tune the batch size within a reasonable range (e.g., 2, 4, 8, 16) to see its impact.\n", "\n", "* **Learning Rate**: A higher learning rate can accelerate training initially. But, a too high value can lead to unstable training and potentially slower convergence. Consider a range of learning rates (e.g., log-uniform distribution between 1e-5 and 1e-3) for exploration.\n", "\n", "* **Gradient Accumulation Steps**: This technique accumulates gradients over multiple batches before updating model weights. It can help reduce memory requirements but might slow down training per epoch. Experiment with different accumulation steps (e.g., 1, 2, 4) to find a balance.\n", "\n", "* **Optimizer Choice**: Some optimizers like Adam or SGD with momentum can be faster than others depending on the model and dataset. Explore different optimizers and their hyperparameters (e.g., momentum coefficient) to see if they lead to faster convergence.\n", "\n", "## Additional Considerations:\n", "\n", "Early Stopping: Implement early stopping to automatically terminate training if the validation loss doesn't improve for a certain number of epochs. This can save training time if the model starts overfitting.\n", "Warmup Steps: A gradual increase in the learning rate during the initial training phase (warmup steps) can improve stability and potentially accelerate convergence compared to a fixed learning rate from the beginning.\n", "\n", "\n", "* Experimentation and Profiling:\n", "\n", "The best hyperparameters for faster training depend on your specific model, dataset, and hardware. You'll need to experiment with different configurations using tools like Hyperopt to find the optimal settings.\n", "Consider using profiling tools to identify bottlenecks in your training pipeline. This can help you focus on optimizing specific parts of the training process that are most time-consuming.\n", "By analyzing these hyperparameters and implementing techniques like early stopping and warmup steps, you can potentially achieve faster fine-tuning while maintaining good model performance." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Method 1 Optuna" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from optuna import create_study, Trial\n", "import time # Assuming you can use time.time() to measure training time\n", "\n", "# Define search space with additional hyperparameter\n", "search_space = {\n", " \"learning_rate\": [1e-5, 5e-5, 1e-4, 2e-4],\n", " \"per_device_train_batch_size\": [2, 4, 8],\n", " \"lora_alpha\": [8, 16, 32],\n", " \"gradient_accumulation_steps\": [1, 2, 4, 8], # Added gradient accumulation steps\n", "}\n", "\n", "def objective(trial):\n", " # Set hyperparameters based on trial values\n", " config[\"training_config\"][\"learning_rate\"] = trial.suggest_float(\"learning_rate\", search_space[\"learning_rate\"][0], search_space[\"learning_rate\"][-1])\n", " config[\"training_config\"][\"per_device_train_batch_size\"] = trial.suggest_int(\"per_device_train_batch_size\", search_space[\"per_device_train_batch_size\"][0], search_space[\"per_device_train_batch_size\"][-1])\n", " config[\"training_config\"][\"gradient_accumulation_steps\"] = trial.suggest_int(\"gradient_accumulation_steps\", search_space[\"gradient_accumulation_steps\"][0], search_space[\"gradient_accumulation_steps\"][-1])\n", " config[\"lora_config\"][\"lora_alpha\"] = trial.suggest_int(\"lora_alpha\", search_space[\"lora_alpha\"][0], search_space[\"lora_alpha\"][-1])\n", "\n", " # Train the model with the current hyperparameters\n", " start_time = time.time()\n", " try:\n", " trainer_stats = trainer_test.train()\n", " training_time = time.time() - start_time\n", " return training_time # Minimize training time\n", " except Exception as e:\n", " return float(\"inf\") # Assign a high value if training fails\n", "\n", "study = create_study(direction=\"minimize\")\n", "study.optimize(objective, n_trials=2) # Adjust the number of trials\n", "\n", "# Access the best trial and its hyperparameters after optimization\n", "best_trial = study.best_trial\n", "best_params = best_trial.params\n", "\n", "print(\"Best Trial:\", best_trial.number)\n", "print(\"Best Hyperparameters (Likely Fastest):\", best_params)\n", "print(\"Best Training Time:\", best_trial.value, \"seconds\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "L4", "machine_shape": "hm", "provenance": [] }, "kernelspec": { "display_name": "Python 3.10", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.14" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "0005f2d9fe1e4cc98ea58b0c2868b433": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_45c1d5b0df0e420a87f791dd4cf0e425", "max": 100, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_9ed49f1a099846a3a65cd6608bafb0e4", "value": 100 } }, "0058ed544fed4272848a891a68b9adc0": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "00eea4b0c6e44c62900ea8e7d919efe9": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "02fc530028ea4d538b7f6b48463ae700": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "058b2b9959b84b6f9f5d3862ef53d029": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_7807f312425b4f4d9249aa1ac77d7461", "placeholder": "​", "style": "IPY_MODEL_d8e7ea9552a84b8284b31d77090b54af", "value": "Map (num_proc=2): 100%" } }, "0f55ae30c2704632941cca4727c1c4f2": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "11dc1dcf6b29471580c32c818fa41d88": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_9344b22940c64654a82bb2ce06530e30", "IPY_MODEL_4f68a26f64e844c7be21cc180eb6c1a2", "IPY_MODEL_769b40273bab41af8eb66e494b613241" ], "layout": "IPY_MODEL_320c09781518483e82defa86c28316d1" } }, "1634ba52355b4681a913039666926f85": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_eff94d2d010e4b4f93a6dfcb61103a52", "max": 18, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_da5cd094aaae45f4a0ca051ad5babd78", "value": 18 } }, "1850ab17bafd4a43b5ab5899d1875a40": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "1a72b512e1374e67a858edf2844fc157": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_284192f01a924f87afd8b5087ca9af6c", "max": 18, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_273bf76f74bc4fb492ccb67d9e202f7b", "value": 18 } }, "217ca5cd404d4756a399fba3aa4fbc15": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_8f88a5b04723482ea430679e504c65f9", "placeholder": "​", "style": "IPY_MODEL_8d153f070a8d4ad1b32996a9fd82beda", "value": " 18/18 [00:00<00:00,  9.43it/s]" } }, "22ea45365d21439fb5069974bbe69711": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "23a71f8847e647daba35e495706fc846": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_22ea45365d21439fb5069974bbe69711", "placeholder": "​", "style": "IPY_MODEL_bd087d0aa3214c5dbecc9b0bd4d976df", "value": "Resolving data files: 100%" } }, "273bf76f74bc4fb492ccb67d9e202f7b": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "284192f01a924f87afd8b5087ca9af6c": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2c5564fb033346afbe7692a24a52b302": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "31a203cdd2f54cda8a05214844888156": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "320c09781518483e82defa86c28316d1": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "32cff795f8bc490dbf63ed130e1f581f": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "33fb10908c23457aa4796626102fc8c5": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "341dca5ac74348dd9b5a347e38fa0b40": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3564e3cf0fe84281838d84525794e735": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_912164947c5847908424f3e60c5adb64", "IPY_MODEL_7517ce80636040e29665a9353afab183", "IPY_MODEL_e14b9d980a1a41fb9e81385cb0f73d3a" ], "layout": "IPY_MODEL_ada78aafba3f47ab8eb45cf3c83a6805" } }, "37803098ceed4528bb690ebee028c840": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "39d3b72ab6214bcf9b0bb6b6294e957c": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3a97281be4c1433aa3abe6c25b7113e2": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_4e19e78059b842a5832ccae2f765a30c", "IPY_MODEL_1a72b512e1374e67a858edf2844fc157", "IPY_MODEL_c9cfd66b68a1437d946c83163fa877df" ], "layout": "IPY_MODEL_cccd970273ae43d2a6e60ac421bdc882" } }, "3f7afd4bd28842cbb73e62c155667030": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9a5fd3a68fd1445f92bea51a7fec3e6b", "max": 18, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_37803098ceed4528bb690ebee028c840", "value": 18 } }, "44f189b81bbd48ca8cb146ead641d2b5": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e903140c8c794c48b231924d3975b7a6", "placeholder": "​", "style": "IPY_MODEL_7e74d789c82747e0b5066a00b9e36c1d", "value": " 100/100 [00:00<00:00, 125.88 examples/s]" } }, "45b3259e3cac4de8bd19d12f07de2adb": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "45c1d5b0df0e420a87f791dd4cf0e425": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4a0426a353ca41cba39d4dfeba925451": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "4e19e78059b842a5832ccae2f765a30c": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_32cff795f8bc490dbf63ed130e1f581f", "placeholder": "​", "style": "IPY_MODEL_4a0426a353ca41cba39d4dfeba925451", "value": "Resolving data files: 100%" } }, "4f68a26f64e844c7be21cc180eb6c1a2": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_341dca5ac74348dd9b5a347e38fa0b40", "max": 18, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_8ba6fd1bf16a4680b8a8c9c55ecf23e7", "value": 18 } }, "51a6d3c97480476e8c22d9ad670bdc47": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "53ee8f5e8b7d4076bdb0167baf2e5729": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "58b932a03b2c4aa4891d541f186244b9": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "5d1fbd3c62d94df7befdefc451221414": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_8ad6abb48f38469f9d399eea8f5e5b70", "IPY_MODEL_6cea0da24cf54811a43168c606759bab", "IPY_MODEL_eb8c88f5c06c49fe9099371b3cf112ae" ], "layout": "IPY_MODEL_89a1354722e640758978befc06ed4a78" } }, "64539b4212fe4d989976f56369bb746b": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "67b9a3505ae644dbb3c4fc14781a2731": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_53ee8f5e8b7d4076bdb0167baf2e5729", "max": 100, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_d70fd9035f9b4d82892fae34c28c46d5", "value": 100 } }, "696e82ec6a174974a90d5abc7c101ee7": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "6cea0da24cf54811a43168c606759bab": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_dade882aca304a31b693a2c58807d825", "max": 18, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_02fc530028ea4d538b7f6b48463ae700", "value": 18 } }, "72eca1e2871b458abd3383d9711215a2": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_058b2b9959b84b6f9f5d3862ef53d029", "IPY_MODEL_85d4879bd7d64766905db34cef052fed", "IPY_MODEL_44f189b81bbd48ca8cb146ead641d2b5" ], "layout": "IPY_MODEL_f89c5c949e984361bce7f97d86d2a2e5" } }, "734b6d3e3406403293c4bc955a643528": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_dc3b2edc3f5d480a93b57b15b4444608", "placeholder": "​", "style": "IPY_MODEL_7967d420aff1414e9fe53eb04c928eb4", "value": "Map: 100%" } }, "7517ce80636040e29665a9353afab183": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_bb078c8c1f6a48359dc654d91ece684d", "max": 18, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_9b9322336b564a409086955ebda07fc3", "value": 18 } }, "769b40273bab41af8eb66e494b613241": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_dc85f5e365f4488fa185d0ae35fde806", "placeholder": "​", "style": "IPY_MODEL_51a6d3c97480476e8c22d9ad670bdc47", "value": " 18/18 [00:00<00:00, 1567.70it/s]" } }, "7807f312425b4f4d9249aa1ac77d7461": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "793f49f397b54daab63194cee8d04256": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7967d420aff1414e9fe53eb04c928eb4": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "7e11cccce8be49008f8db3a0c3ea603d": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7e74d789c82747e0b5066a00b9e36c1d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "82c6c2752a0746f3935e069c0f8811d6": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "85d4879bd7d64766905db34cef052fed": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0058ed544fed4272848a891a68b9adc0", "max": 100, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_33fb10908c23457aa4796626102fc8c5", "value": 100 } }, "89a1354722e640758978befc06ed4a78": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "8a195771bdc0462e8f9fbb60eb9141b1": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "8a8d3a006ee24c4393d7c2f2d040ce52": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "8ad6abb48f38469f9d399eea8f5e5b70": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_39d3b72ab6214bcf9b0bb6b6294e957c", "placeholder": "​", "style": "IPY_MODEL_696e82ec6a174974a90d5abc7c101ee7", "value": "Resolving data files: 100%" } }, "8ba6fd1bf16a4680b8a8c9c55ecf23e7": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "8d153f070a8d4ad1b32996a9fd82beda": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "8f88a5b04723482ea430679e504c65f9": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "912164947c5847908424f3e60c5adb64": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_ff108c92fb5547869ee545cf9a094b07", "placeholder": "​", "style": "IPY_MODEL_2c5564fb033346afbe7692a24a52b302", "value": "Loading dataset shards: 100%" } }, "9344b22940c64654a82bb2ce06530e30": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_793f49f397b54daab63194cee8d04256", "placeholder": "​", "style": "IPY_MODEL_fa79cfa23f3a430dab69a59d93383cd0", "value": "Resolving data files: 100%" } }, "963c0aa5620b4ea8b5a903894646121c": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9a5fd3a68fd1445f92bea51a7fec3e6b": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9b9322336b564a409086955ebda07fc3": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "9bceb9eddb2147c1abbf3391c70e6784": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9ed49f1a099846a3a65cd6608bafb0e4": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "9f91f7ce62e243f59d72e5ba36f97b8f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_af0233735d744b7e838f50f52c9d6cbe", "placeholder": "​", "style": "IPY_MODEL_8a8d3a006ee24c4393d7c2f2d040ce52", "value": "Loading dataset shards: 100%" } }, "a419499622cd4374937423a79677298f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b93514308ae44afbb1a0511f5f9c6ddf", "placeholder": "​", "style": "IPY_MODEL_58b932a03b2c4aa4891d541f186244b9", "value": " 18/18 [00:00<00:00, 1458.49it/s]" } }, "ada78aafba3f47ab8eb45cf3c83a6805": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "af0096de28414303ba5324f4087cd92e": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "af0233735d744b7e838f50f52c9d6cbe": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b7e7896aeac74b6eae27de0677100e57": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "b8b277831f1a45109b3a4a3565fbdb9d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_9f91f7ce62e243f59d72e5ba36f97b8f", "IPY_MODEL_1634ba52355b4681a913039666926f85", "IPY_MODEL_217ca5cd404d4756a399fba3aa4fbc15" ], "layout": "IPY_MODEL_bc6d92cb8837428bb7038d75e6af604e" } }, "b93514308ae44afbb1a0511f5f9c6ddf": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "bb078c8c1f6a48359dc654d91ece684d": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "bb1156b7d349440d9cc8a2f0328465a7": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_23a71f8847e647daba35e495706fc846", "IPY_MODEL_3f7afd4bd28842cbb73e62c155667030", "IPY_MODEL_a419499622cd4374937423a79677298f" ], "layout": "IPY_MODEL_64539b4212fe4d989976f56369bb746b" } }, "bc6d92cb8837428bb7038d75e6af604e": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "bd087d0aa3214c5dbecc9b0bd4d976df": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "be6162f66e594d3ebd8c53ebab3bbfa6": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_963c0aa5620b4ea8b5a903894646121c", "placeholder": "​", "style": "IPY_MODEL_31a203cdd2f54cda8a05214844888156", "value": " 100/100 [00:00<00:00, 5440.44 examples/s]" } }, "c4d39c87c16c4961b942d896742ff7ce": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_af0096de28414303ba5324f4087cd92e", "placeholder": "​", "style": "IPY_MODEL_0f55ae30c2704632941cca4727c1c4f2", "value": " 100/100 [00:01<00:00, 113.55 examples/s]" } }, "c9cfd66b68a1437d946c83163fa877df": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_45b3259e3cac4de8bd19d12f07de2adb", "placeholder": "​", "style": "IPY_MODEL_b7e7896aeac74b6eae27de0677100e57", "value": " 18/18 [00:00<00:00,  1.32it/s]" } }, "cccd970273ae43d2a6e60ac421bdc882": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d70fd9035f9b4d82892fae34c28c46d5": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "d8e7ea9552a84b8284b31d77090b54af": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "da5cd094aaae45f4a0ca051ad5babd78": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "dade882aca304a31b693a2c58807d825": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "dc3b2edc3f5d480a93b57b15b4444608": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "dc85f5e365f4488fa185d0ae35fde806": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e14b9d980a1a41fb9e81385cb0f73d3a": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9bceb9eddb2147c1abbf3391c70e6784", "placeholder": "​", "style": "IPY_MODEL_8a195771bdc0462e8f9fbb60eb9141b1", "value": " 18/18 [00:35<00:00,  1.20it/s]" } }, "e257e4a2bfdb48038102173d397ab2e4": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_82c6c2752a0746f3935e069c0f8811d6", "placeholder": "​", "style": "IPY_MODEL_1850ab17bafd4a43b5ab5899d1875a40", "value": "Map (num_proc=2): 100%" } }, "e3bd7f85ce194cd4b697c2eb82038658": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_734b6d3e3406403293c4bc955a643528", "IPY_MODEL_0005f2d9fe1e4cc98ea58b0c2868b433", "IPY_MODEL_be6162f66e594d3ebd8c53ebab3bbfa6" ], "layout": "IPY_MODEL_7e11cccce8be49008f8db3a0c3ea603d" } }, "e5880b946aae4b84a94226a5d6acaf45": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e903140c8c794c48b231924d3975b7a6": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "eb8c88f5c06c49fe9099371b3cf112ae": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_00eea4b0c6e44c62900ea8e7d919efe9", "placeholder": "​", "style": "IPY_MODEL_fe17bedb5ef04d8b9e064fa1e0d75185", "value": " 18/18 [00:00<00:00,  1.42it/s]" } }, "eff94d2d010e4b4f93a6dfcb61103a52": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f89c5c949e984361bce7f97d86d2a2e5": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "fa79cfa23f3a430dab69a59d93383cd0": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "fe17bedb5ef04d8b9e064fa1e0d75185": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ff108c92fb5547869ee545cf9a094b07": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ffa74977e7464cebb16d3cf8ee976d51": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_e257e4a2bfdb48038102173d397ab2e4", "IPY_MODEL_67b9a3505ae644dbb3c4fc14781a2731", "IPY_MODEL_c4d39c87c16c4961b942d896742ff7ce" ], "layout": "IPY_MODEL_e5880b946aae4b84a94226a5d6acaf45" } } } } }, "nbformat": 4, "nbformat_minor": 1 }