{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ID8Vmc0vF4Ay",
        "outputId": "76f7bedf-7711-483b-9d5e-e340d8cfbc36"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Requirement already satisfied: transformers in /usr/local/lib/python3.11/dist-packages (4.47.1)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from transformers) (3.16.1)\n",
            "Requirement already satisfied: huggingface-hub<1.0,>=0.24.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.27.1)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (1.26.4)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (24.2)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from transformers) (6.0.2)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (2024.11.6)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from transformers) (2.32.3)\n",
            "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.21.0)\n",
            "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.5.2)\n",
            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.11/dist-packages (from transformers) (4.67.1)\n",
            "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub<1.0,>=0.24.0->transformers) (2024.9.0)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub<1.0,>=0.24.0->transformers) (4.12.2)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.4.1)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.10)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2.3.0)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2024.12.14)\n",
            "Requirement already satisfied: datasets in /usr/local/lib/python3.11/dist-packages (3.2.0)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from datasets) (3.16.1)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from datasets) (1.26.4)\n",
            "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (17.0.0)\n",
            "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.3.8)\n",
            "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from datasets) (2.2.2)\n",
            "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.11/dist-packages (from datasets) (2.32.3)\n",
            "Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.11/dist-packages (from datasets) (4.67.1)\n",
            "Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from datasets) (3.5.0)\n",
            "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.70.16)\n",
            "Requirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /usr/local/lib/python3.11/dist-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets) (2024.9.0)\n",
            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets) (3.11.11)\n",
            "Requirement already satisfied: huggingface-hub>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.27.1)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from datasets) (24.2)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from datasets) (6.0.2)\n",
            "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (2.4.4)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.3.2)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (24.3.0)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.5.0)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (6.1.0)\n",
            "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (0.2.1)\n",
            "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.18.3)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.23.0->datasets) (4.12.2)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (3.4.1)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (3.10)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (2.3.0)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (2024.12.14)\n",
            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2024.2)\n",
            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2024.2)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)\n",
            "Requirement already satisfied: evaluate in /usr/local/lib/python3.11/dist-packages (0.4.3)\n",
            "Requirement already satisfied: datasets>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from evaluate) (3.2.0)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from evaluate) (1.26.4)\n",
            "Requirement already satisfied: dill in /usr/local/lib/python3.11/dist-packages (from evaluate) (0.3.8)\n",
            "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from evaluate) (2.2.2)\n",
            "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.11/dist-packages (from evaluate) (2.32.3)\n",
            "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.11/dist-packages (from evaluate) (4.67.1)\n",
            "Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from evaluate) (3.5.0)\n",
            "Requirement already satisfied: multiprocess in /usr/local/lib/python3.11/dist-packages (from evaluate) (0.70.16)\n",
            "Requirement already satisfied: fsspec>=2021.05.0 in /usr/local/lib/python3.11/dist-packages (from fsspec[http]>=2021.05.0->evaluate) (2024.9.0)\n",
            "Requirement already satisfied: huggingface-hub>=0.7.0 in /usr/local/lib/python3.11/dist-packages (from evaluate) (0.27.1)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from evaluate) (24.2)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from datasets>=2.0.0->evaluate) (3.16.1)\n",
            "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.0.0->evaluate) (17.0.0)\n",
            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets>=2.0.0->evaluate) (3.11.11)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.0.0->evaluate) (6.0.2)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.7.0->evaluate) (4.12.2)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.19.0->evaluate) (3.4.1)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests>=2.19.0->evaluate) (3.10)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.19.0->evaluate) (2.3.0)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests>=2.19.0->evaluate) (2024.12.14)\n",
            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->evaluate) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->evaluate) (2024.2)\n",
            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->evaluate) (2024.2)\n",
            "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (2.4.4)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.3.2)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (24.3.0)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.5.0)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (6.1.0)\n",
            "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (0.2.1)\n",
            "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.18.3)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->evaluate) (1.17.0)\n",
            "Requirement already satisfied: rouge_score in /usr/local/lib/python3.11/dist-packages (0.1.2)\n",
            "Requirement already satisfied: absl-py in /usr/local/lib/python3.11/dist-packages (from rouge_score) (1.4.0)\n",
            "Requirement already satisfied: nltk in /usr/local/lib/python3.11/dist-packages (from rouge_score) (3.9.1)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from rouge_score) (1.26.4)\n",
            "Requirement already satisfied: six>=1.14.0 in /usr/local/lib/python3.11/dist-packages (from rouge_score) (1.17.0)\n",
            "Requirement already satisfied: click in /usr/local/lib/python3.11/dist-packages (from nltk->rouge_score) (8.1.8)\n",
            "Requirement already satisfied: joblib in /usr/local/lib/python3.11/dist-packages (from nltk->rouge_score) (1.4.2)\n",
            "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.11/dist-packages (from nltk->rouge_score) (2024.11.6)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from nltk->rouge_score) (4.67.1)\n",
            "Requirement already satisfied: nltk in /usr/local/lib/python3.11/dist-packages (3.9.1)\n",
            "Requirement already satisfied: click in /usr/local/lib/python3.11/dist-packages (from nltk) (8.1.8)\n",
            "Requirement already satisfied: joblib in /usr/local/lib/python3.11/dist-packages (from nltk) (1.4.2)\n",
            "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.11/dist-packages (from nltk) (2024.11.6)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from nltk) (4.67.1)\n",
            "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.11/dist-packages (0.2.0)\n"
          ]
        }
      ],
      "source": [
        "!pip install transformers\n",
        "!pip install datasets\n",
        "!pip install evaluate\n",
        "!pip install rouge_score\n",
        "!pip install nltk\n",
        "!pip install sentencepiece"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "id": "Vhj7zRU5F2tU"
      },
      "outputs": [],
      "source": [
        "from datasets import load_dataset\n",
        "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer\n",
        "import torch\n",
        "import time\n",
        "import evaluate\n",
        "import pandas as pd\n",
        "import numpy as np"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "_rMVgsurF2tW",
        "outputId": "03ba2bdb-f8d0-42dc-f2e7-94ccb3f44287"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "CUDA available, using CUDA\n"
          ]
        }
      ],
      "source": [
        "if torch.cuda.is_available():\n",
        "    print(\"CUDA available, using CUDA\")\n",
        "    device = torch.device(\"cuda\")\n",
        "elif torch.backends.mps.is_available():\n",
        "    print(\"MLX available, using MLX\")\n",
        "    device = torch.device(\"mps\")\n",
        "else:\n",
        "    print(\"Using CPU\")\n",
        "    device = torch.device(\"cpu\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "78YtJIJeF2tW",
        "outputId": "5fddb779-0c9d-4752-af4e-50a894a25c01"
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n",
            "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
            "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
            "You will be able to reuse this secret in all of your notebooks.\n",
            "Please note that authentication is recommended but still optional to access public models or datasets.\n",
            "  warnings.warn(\n"
          ]
        },
        {
          "data": {
            "text/plain": [
              "DatasetDict({\n",
              "    train: Dataset({\n",
              "        features: ['id', 'dialogue', 'summary', 'topic'],\n",
              "        num_rows: 12460\n",
              "    })\n",
              "    validation: Dataset({\n",
              "        features: ['id', 'dialogue', 'summary', 'topic'],\n",
              "        num_rows: 500\n",
              "    })\n",
              "    test: Dataset({\n",
              "        features: ['id', 'dialogue', 'summary', 'topic'],\n",
              "        num_rows: 1500\n",
              "    })\n",
              "})"
            ]
          },
          "execution_count": 4,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "training_dataset_name = \"knkarthick/dialogsum\"\n",
        "dataset = load_dataset(training_dataset_name)\n",
        "dataset"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "id": "4AjBmSxVF2tW"
      },
      "outputs": [],
      "source": [
        "model_name = \"google/flan-t5-base\"\n",
        "base_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)\n",
        "tokenizer = AutoTokenizer.from_pretrained(model_name)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 6,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "id": "gwWyK9aEF2tW",
        "outputId": "c7e9a9fe-fae6-4739-a6dd-38e54e6b83d3"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            },
            "text/plain": [
              "'All parameters: 247577856 \\n Trainable parameters: 247577856 \\n Percentage Trainable: 100.0'"
            ]
          },
          "execution_count": 6,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "def print_number_of_trainable_model_parameters(model):\n",
        "    trainable_model_params = 0\n",
        "    all_model_params = 0\n",
        "    for _, param in model.named_parameters():\n",
        "        all_model_params += param.numel()\n",
        "        if param.requires_grad:\n",
        "            trainable_model_params += param.numel()\n",
        "    return f\"All parameters: {all_model_params} \\n Trainable parameters: {trainable_model_params} \\n Percentage Trainable: {trainable_model_params/all_model_params * 100}\"\n",
        "\n",
        "print_number_of_trainable_model_parameters(base_model)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "3ZGNban5F2tX",
        "outputId": "0587a92b-853e-44cd-bcec-3da20d073ab3"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "---------------------------------------------------------------------------------------------------\n",
            "INPUT PROMPT:\n",
            "\n",
            "Summarize the following conversation.\n",
            "#Person1#: Taxi!\n",
            "#Person2#: Where will you go, sir?\n",
            "#Person1#: Friendship Hotel.\n",
            "#Person2#: OK, it's not far from here.\n",
            "#Person1#: I have something important to do, can you fast the speed?\n",
            "#Person2#: Sure, I'll try my best. Here we are.\n",
            "#Person1#: It's fast! How much should I pay you?\n",
            "#Person2#: The reading on the meter is 15 yuan.\n",
            "#Person1#: Here's 20 yuan, keep the change.\n",
            "#Person2#: Thank you very much.\n",
            "Summary:\n",
            "\n",
            "---------------------------------------------------------------------------------------------------\n",
            "BASELINE SUMMARY:\n",
            " #Person1# takes a taxi to the Friendship Hotel for something important.\n",
            "\n",
            "---------------------------------------------------------------------------------------------------\n",
            "MODEL GENERATED SUMMARY:\n",
            " The taxi will pick you up at the Friendship Hotel.\n"
          ]
        }
      ],
      "source": [
        "index = 150\n",
        "\n",
        "dialogue = dataset['test'][index]['dialogue']\n",
        "summary = dataset['test'][index]['summary']\n",
        "prompt = f\"\"\"\n",
        "Summarize the following conversation.\n",
        "{dialogue}\n",
        "Summary:\n",
        "\"\"\"\n",
        "inputs = tokenizer(prompt, return_tensors='pt')\n",
        "output = tokenizer.decode(\n",
        "    base_model.generate(\n",
        "        inputs[\"input_ids\"],\n",
        "        max_new_tokens = 200,\n",
        "    )[0],\n",
        "    skip_special_tokens=True\n",
        ")\n",
        "dash_line = '-'.join('' for x in range(100))\n",
        "print(dash_line)\n",
        "print(f'INPUT PROMPT:\\n{prompt}')\n",
        "print(dash_line)\n",
        "print(f'BASELINE SUMMARY:\\n {summary}\\n')\n",
        "print(dash_line)\n",
        "print(f'MODEL GENERATED SUMMARY:\\n {output}')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "metadata": {
        "id": "4ZRPbFyuF2tX"
      },
      "outputs": [],
      "source": [
        "def tokenize_function(example):\n",
        "    start_prompt = \"Summarize the following conversation.\\n\\n\"\n",
        "    end_prompt = '\\n\\nSummary: '\n",
        "    prompt = [start_prompt + dialogue + end_prompt for dialogue in example['dialogue']]\n",
        "    example['input_ids'] = tokenizer(prompt, padding='max_length', truncation=True, return_tensors='pt').input_ids\n",
        "    example['labels'] = tokenizer(example['summary'], padding='max_length', truncation=True, return_tensors='pt').input_ids\n",
        "    return example"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 9,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 49,
          "referenced_widgets": [
            "191e83ce3953499c9be33286ec762c0a",
            "1fee7f795e714d028ff8af71b9ae56de",
            "ba8f14998569469b95e7153e89a86098",
            "f692c5360afe44839ed23e80a35e7f62",
            "3d15497caf044cc7bc4034ef8bc48238",
            "733007b2c33a42ebb42d6ce8392b514b",
            "5cfb7f92123f4884aa4442e227277378",
            "2c4d28068f1d4db5987ddd166c71477e",
            "775b61d5bde74ac7a8f74262ee208f0a",
            "89cd27d582994cfb8db85a8be2df76e3",
            "20b63bfeec184873a1246086ececfc0c"
          ]
        },
        "id": "PXto6CyVF2tX",
        "outputId": "876baf5f-1b0c-4b5a-d119-364b956519d4"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "191e83ce3953499c9be33286ec762c0a",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "Map:   0%|          | 0/500 [00:00<?, ? examples/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        }
      ],
      "source": [
        "tokenized_datasets = dataset.map(tokenize_function, batched=True, batch_size=32)\n",
        "tokenized_datasets = tokenized_datasets.remove_columns(['id', 'topic', 'dialogue', 'summary'])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 10,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 370,
          "referenced_widgets": [
            "2e038389704c4f40b8ab9cead672e725",
            "e85554e5cd22434e892693edbcc83a4b",
            "b9eb2c31392945a1bc3bacdd782addac",
            "5a5a96fba0334b50814fd03b86c418fc",
            "e6466602735343eabb67125d74258634",
            "18201ec200b143d0b982926365167688",
            "f716d287b7e44e1f9399fa3d42a3a360",
            "5f181ed93540409eabf5481cfcca8912",
            "4885cbf3e7594b0ab825721187fe0c27",
            "9ce011dc17e64018ba76b7f408f99c78",
            "5ee5e919d1e8426aa9aa9108ac46d26d"
          ]
        },
        "id": "nimBngpHF2tX",
        "outputId": "0a0d63e7-46b8-4c64-d019-4f562a7a00df"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "2e038389704c4f40b8ab9cead672e725",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "Filter:   0%|          | 0/500 [00:00<?, ? examples/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Shapes of datasets:\n",
            "Training: (125, 2)\n",
            "Validation: (5, 2)\n",
            "Test: (15, 2)\n",
            "DatasetDict({\n",
            "    train: Dataset({\n",
            "        features: ['input_ids', 'labels'],\n",
            "        num_rows: 125\n",
            "    })\n",
            "    validation: Dataset({\n",
            "        features: ['input_ids', 'labels'],\n",
            "        num_rows: 5\n",
            "    })\n",
            "    test: Dataset({\n",
            "        features: ['input_ids', 'labels'],\n",
            "        num_rows: 15\n",
            "    })\n",
            "})\n"
          ]
        }
      ],
      "source": [
        "tokenized_datasets = tokenized_datasets.filter(lambda example, index: index % 100 == 0, with_indices=True)\n",
        "print(f\"Shapes of datasets:\")\n",
        "print(f\"Training: {tokenized_datasets['train'].shape}\")\n",
        "print(f\"Validation: {tokenized_datasets['validation'].shape}\")\n",
        "print(f\"Test: {tokenized_datasets['test'].shape}\")\n",
        "print(tokenized_datasets)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 11,
      "metadata": {
        "id": "TP2QjhYQF2tY"
      },
      "outputs": [],
      "source": [
        "output_dir = f'./training-{str(int(time.time()))}'\n",
        "training_args = TrainingArguments(\n",
        "    output_dir=output_dir,\n",
        "    learning_rate=1e-3,\n",
        "    num_train_epochs=50,\n",
        "    weight_decay=0.01,\n",
        "    logging_steps=1,\n",
        "    max_steps=100\n",
        ")\n",
        "trainer = Trainer(\n",
        "    model=base_model,\n",
        "    args=training_args,\n",
        "    train_dataset=tokenized_datasets['train'],\n",
        "    eval_dataset=tokenized_datasets['validation']\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 12,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "id": "ZKXbD5CAF2tY",
        "outputId": "1e922722-c2f8-47eb-83da-6de688dd8723"
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mdhrumeen\u001b[0m (\u001b[33mdhrumeen-umass\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.\n"
          ]
        },
        {
          "data": {
            "text/html": [
              "Tracking run with wandb version 0.19.2"
            ],
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "text/html": [
              "Run data is saved locally in <code>/content/wandb/run-20250120_072049-urybv4uy</code>"
            ],
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "text/html": [
              "Syncing run <strong><a href='https://wandb.ai/dhrumeen-umass/huggingface/runs/urybv4uy' target=\"_blank\">./training-1737357642</a></strong> to <a href='https://wandb.ai/dhrumeen-umass/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/developer-guide' target=\"_blank\">docs</a>)<br>"
            ],
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "text/html": [
              " View project at <a href='https://wandb.ai/dhrumeen-umass/huggingface' target=\"_blank\">https://wandb.ai/dhrumeen-umass/huggingface</a>"
            ],
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "text/html": [
              " View run at <a href='https://wandb.ai/dhrumeen-umass/huggingface/runs/urybv4uy' target=\"_blank\">https://wandb.ai/dhrumeen-umass/huggingface/runs/urybv4uy</a>"
            ],
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.\n"
          ]
        },
        {
          "data": {
            "text/html": [
              "\n",
              "    <div>\n",
              "      \n",
              "      <progress value='100' max='100' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
              "      [100/100 04:43, Epoch 6/7]\n",
              "    </div>\n",
              "    <table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              " <tr style=\"text-align: left;\">\n",
              "      <th>Step</th>\n",
              "      <th>Training Loss</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <td>1</td>\n",
              "      <td>49.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>2</td>\n",
              "      <td>23.875000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>3</td>\n",
              "      <td>14.937500</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>4</td>\n",
              "      <td>6.031200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>5</td>\n",
              "      <td>4.562500</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>6</td>\n",
              "      <td>4.312500</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>7</td>\n",
              "      <td>4.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>8</td>\n",
              "      <td>3.640600</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>9</td>\n",
              "      <td>3.109400</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>10</td>\n",
              "      <td>2.843800</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>11</td>\n",
              "      <td>2.406200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>12</td>\n",
              "      <td>2.343800</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>13</td>\n",
              "      <td>1.851600</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>14</td>\n",
              "      <td>1.687500</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>15</td>\n",
              "      <td>1.460900</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>16</td>\n",
              "      <td>1.203100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>17</td>\n",
              "      <td>1.015600</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>18</td>\n",
              "      <td>1.085900</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>19</td>\n",
              "      <td>0.761700</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>20</td>\n",
              "      <td>0.632800</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>21</td>\n",
              "      <td>0.558600</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>22</td>\n",
              "      <td>0.494100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>23</td>\n",
              "      <td>0.425800</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>24</td>\n",
              "      <td>0.347700</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>25</td>\n",
              "      <td>0.320300</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>26</td>\n",
              "      <td>0.243200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>27</td>\n",
              "      <td>0.259800</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>28</td>\n",
              "      <td>0.222700</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>29</td>\n",
              "      <td>0.219700</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>30</td>\n",
              "      <td>0.359400</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>31</td>\n",
              "      <td>0.207000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>32</td>\n",
              "      <td>0.202100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>33</td>\n",
              "      <td>0.191400</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>34</td>\n",
              "      <td>0.204100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>35</td>\n",
              "      <td>0.156200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>36</td>\n",
              "      <td>0.118700</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>37</td>\n",
              "      <td>0.136700</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>38</td>\n",
              "      <td>0.127900</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>39</td>\n",
              "      <td>0.248000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>40</td>\n",
              "      <td>0.112800</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>41</td>\n",
              "      <td>0.135700</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>42</td>\n",
              "      <td>0.148400</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>43</td>\n",
              "      <td>0.150400</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>44</td>\n",
              "      <td>0.094200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>45</td>\n",
              "      <td>0.127900</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>46</td>\n",
              "      <td>0.119600</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>47</td>\n",
              "      <td>0.165000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>48</td>\n",
              "      <td>0.218800</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>49</td>\n",
              "      <td>0.119100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>50</td>\n",
              "      <td>0.123500</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>51</td>\n",
              "      <td>0.124500</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>52</td>\n",
              "      <td>0.130900</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>53</td>\n",
              "      <td>0.102500</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>54</td>\n",
              "      <td>0.079600</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>55</td>\n",
              "      <td>0.128900</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>56</td>\n",
              "      <td>0.074200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>57</td>\n",
              "      <td>0.084500</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>58</td>\n",
              "      <td>0.099600</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>59</td>\n",
              "      <td>0.111800</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>60</td>\n",
              "      <td>0.102100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>61</td>\n",
              "      <td>0.090300</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>62</td>\n",
              "      <td>0.096200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>63</td>\n",
              "      <td>0.120600</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>64</td>\n",
              "      <td>0.158200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>65</td>\n",
              "      <td>0.061500</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>66</td>\n",
              "      <td>0.082500</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>67</td>\n",
              "      <td>0.067900</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>68</td>\n",
              "      <td>0.096200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>69</td>\n",
              "      <td>0.098100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>70</td>\n",
              "      <td>0.073700</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>71</td>\n",
              "      <td>0.079100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>72</td>\n",
              "      <td>0.106900</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>73</td>\n",
              "      <td>0.155300</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>74</td>\n",
              "      <td>0.096200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>75</td>\n",
              "      <td>0.075200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>76</td>\n",
              "      <td>0.096200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>77</td>\n",
              "      <td>0.092800</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>78</td>\n",
              "      <td>0.084000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>79</td>\n",
              "      <td>0.075200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>80</td>\n",
              "      <td>0.064500</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>81</td>\n",
              "      <td>0.058600</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>82</td>\n",
              "      <td>0.067900</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>83</td>\n",
              "      <td>0.080600</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>84</td>\n",
              "      <td>0.094200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>85</td>\n",
              "      <td>0.066400</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>86</td>\n",
              "      <td>0.089400</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>87</td>\n",
              "      <td>0.088400</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>88</td>\n",
              "      <td>0.095700</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>89</td>\n",
              "      <td>0.098600</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>90</td>\n",
              "      <td>0.057600</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>91</td>\n",
              "      <td>0.084500</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>92</td>\n",
              "      <td>0.056900</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>93</td>\n",
              "      <td>0.072800</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>94</td>\n",
              "      <td>0.147500</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>95</td>\n",
              "      <td>0.070800</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>96</td>\n",
              "      <td>0.053200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>97</td>\n",
              "      <td>0.069300</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>98</td>\n",
              "      <td>0.064900</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>99</td>\n",
              "      <td>0.067400</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>100</td>\n",
              "      <td>0.088900</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table><p>"
            ],
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "text/plain": [
              "TrainOutput(global_step=100, training_loss=1.41803466796875, metrics={'train_runtime': 292.467, 'train_samples_per_second': 2.735, 'train_steps_per_second': 0.342, 'total_flos': 535480249614336.0, 'train_loss': 1.41803466796875, 'epoch': 6.25})"
            ]
          },
          "execution_count": 12,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "trainer.train()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 25,
      "metadata": {
        "id": "XWoSZE_zF2tY"
      },
      "outputs": [],
      "source": [
        "trained_model_dir = './trained_model'\n",
        "trainer.save_model(trained_model_dir)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 14,
      "metadata": {
        "id": "0DgzurAoF2tY"
      },
      "outputs": [],
      "source": [
        "trained_model_dir = './trained_model'\n",
        "trained_model = AutoModelForSeq2SeqLM.from_pretrained(trained_model_dir)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 15,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "wPOZmfIoF2tY",
        "outputId": "7d31d962-b30e-420d-cf19-b7822624f2f4"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "T5ForConditionalGeneration(\n",
              "  (shared): Embedding(32128, 768)\n",
              "  (encoder): T5Stack(\n",
              "    (embed_tokens): Embedding(32128, 768)\n",
              "    (block): ModuleList(\n",
              "      (0): T5Block(\n",
              "        (layer): ModuleList(\n",
              "          (0): T5LayerSelfAttention(\n",
              "            (SelfAttention): T5Attention(\n",
              "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
              "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
              "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
              "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
              "              (relative_attention_bias): Embedding(32, 12)\n",
              "            )\n",
              "            (layer_norm): T5LayerNorm()\n",
              "            (dropout): Dropout(p=0.1, inplace=False)\n",
              "          )\n",
              "          (1): T5LayerFF(\n",
              "            (DenseReluDense): T5DenseGatedActDense(\n",
              "              (wi_0): Linear(in_features=768, out_features=2048, bias=False)\n",
              "              (wi_1): Linear(in_features=768, out_features=2048, bias=False)\n",
              "              (wo): Linear(in_features=2048, out_features=768, bias=False)\n",
              "              (dropout): Dropout(p=0.1, inplace=False)\n",
              "              (act): NewGELUActivation()\n",
              "            )\n",
              "            (layer_norm): T5LayerNorm()\n",
              "            (dropout): Dropout(p=0.1, inplace=False)\n",
              "          )\n",
              "        )\n",
              "      )\n",
              "      (1-11): 11 x T5Block(\n",
              "        (layer): ModuleList(\n",
              "          (0): T5LayerSelfAttention(\n",
              "            (SelfAttention): T5Attention(\n",
              "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
              "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
              "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
              "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
              "            )\n",
              "            (layer_norm): T5LayerNorm()\n",
              "            (dropout): Dropout(p=0.1, inplace=False)\n",
              "          )\n",
              "          (1): T5LayerFF(\n",
              "            (DenseReluDense): T5DenseGatedActDense(\n",
              "              (wi_0): Linear(in_features=768, out_features=2048, bias=False)\n",
              "              (wi_1): Linear(in_features=768, out_features=2048, bias=False)\n",
              "              (wo): Linear(in_features=2048, out_features=768, bias=False)\n",
              "              (dropout): Dropout(p=0.1, inplace=False)\n",
              "              (act): NewGELUActivation()\n",
              "            )\n",
              "            (layer_norm): T5LayerNorm()\n",
              "            (dropout): Dropout(p=0.1, inplace=False)\n",
              "          )\n",
              "        )\n",
              "      )\n",
              "    )\n",
              "    (final_layer_norm): T5LayerNorm()\n",
              "    (dropout): Dropout(p=0.1, inplace=False)\n",
              "  )\n",
              "  (decoder): T5Stack(\n",
              "    (embed_tokens): Embedding(32128, 768)\n",
              "    (block): ModuleList(\n",
              "      (0): T5Block(\n",
              "        (layer): ModuleList(\n",
              "          (0): T5LayerSelfAttention(\n",
              "            (SelfAttention): T5Attention(\n",
              "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
              "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
              "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
              "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
              "              (relative_attention_bias): Embedding(32, 12)\n",
              "            )\n",
              "            (layer_norm): T5LayerNorm()\n",
              "            (dropout): Dropout(p=0.1, inplace=False)\n",
              "          )\n",
              "          (1): T5LayerCrossAttention(\n",
              "            (EncDecAttention): T5Attention(\n",
              "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
              "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
              "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
              "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
              "            )\n",
              "            (layer_norm): T5LayerNorm()\n",
              "            (dropout): Dropout(p=0.1, inplace=False)\n",
              "          )\n",
              "          (2): T5LayerFF(\n",
              "            (DenseReluDense): T5DenseGatedActDense(\n",
              "              (wi_0): Linear(in_features=768, out_features=2048, bias=False)\n",
              "              (wi_1): Linear(in_features=768, out_features=2048, bias=False)\n",
              "              (wo): Linear(in_features=2048, out_features=768, bias=False)\n",
              "              (dropout): Dropout(p=0.1, inplace=False)\n",
              "              (act): NewGELUActivation()\n",
              "            )\n",
              "            (layer_norm): T5LayerNorm()\n",
              "            (dropout): Dropout(p=0.1, inplace=False)\n",
              "          )\n",
              "        )\n",
              "      )\n",
              "      (1-11): 11 x T5Block(\n",
              "        (layer): ModuleList(\n",
              "          (0): T5LayerSelfAttention(\n",
              "            (SelfAttention): T5Attention(\n",
              "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
              "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
              "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
              "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
              "            )\n",
              "            (layer_norm): T5LayerNorm()\n",
              "            (dropout): Dropout(p=0.1, inplace=False)\n",
              "          )\n",
              "          (1): T5LayerCrossAttention(\n",
              "            (EncDecAttention): T5Attention(\n",
              "              (q): Linear(in_features=768, out_features=768, bias=False)\n",
              "              (k): Linear(in_features=768, out_features=768, bias=False)\n",
              "              (v): Linear(in_features=768, out_features=768, bias=False)\n",
              "              (o): Linear(in_features=768, out_features=768, bias=False)\n",
              "            )\n",
              "            (layer_norm): T5LayerNorm()\n",
              "            (dropout): Dropout(p=0.1, inplace=False)\n",
              "          )\n",
              "          (2): T5LayerFF(\n",
              "            (DenseReluDense): T5DenseGatedActDense(\n",
              "              (wi_0): Linear(in_features=768, out_features=2048, bias=False)\n",
              "              (wi_1): Linear(in_features=768, out_features=2048, bias=False)\n",
              "              (wo): Linear(in_features=2048, out_features=768, bias=False)\n",
              "              (dropout): Dropout(p=0.1, inplace=False)\n",
              "              (act): NewGELUActivation()\n",
              "            )\n",
              "            (layer_norm): T5LayerNorm()\n",
              "            (dropout): Dropout(p=0.1, inplace=False)\n",
              "          )\n",
              "        )\n",
              "      )\n",
              "    )\n",
              "    (final_layer_norm): T5LayerNorm()\n",
              "    (dropout): Dropout(p=0.1, inplace=False)\n",
              "  )\n",
              "  (lm_head): Linear(in_features=768, out_features=32128, bias=False)\n",
              ")"
            ]
          },
          "execution_count": 15,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
        "input_ids = tokenizer(prompt, return_tensors=\"pt\").input_ids\n",
        "input_ids = input_ids.to(device)\n",
        "base_model.to(device)\n",
        "trained_model.to(device)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 16,
      "metadata": {
        "id": "H9f5QCZzF2tY"
      },
      "outputs": [],
      "source": [
        "generation_config = GenerationConfig(max_new_tokens=200, num_beams=1)\n",
        "original_model_outputs = base_model.generate(input_ids=input_ids, generation_config=generation_config)\n",
        "original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 17,
      "metadata": {
        "id": "Wmeo0oqGF2tY"
      },
      "outputs": [],
      "source": [
        "trained_model_outputs = trained_model.generate(input_ids=input_ids, generation_config=generation_config)\n",
        "trained_model_text_output = tokenizer.decode(trained_model_outputs[0], skip_special_tokens=True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 18,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "So_8WpbbF2tY",
        "outputId": "d1585112-5af9-4c8b-f0d6-70fdec6186fa"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "--------------------------------------------------\n",
            "BASELINE HUMAN SUMMARY: \n",
            "#Person1# takes a taxi to the Friendship Hotel for something important.\n",
            "--------------------------------------------------\n",
            "ORIGINAL MODEL: \n",
            "#Person1# is a taxi to Friendship Hotel. #Person2# will try his best to get the meter, but #Person1# will try his best.\n",
            "--------------------------------------------------\n",
            "TRAINED MODEL: \n",
            "#Person1# will go to Friendship Hotel. #Person1# will try his best to pay #Person1#.\n"
          ]
        }
      ],
      "source": [
        "human_baseline_summary = summary\n",
        "dash_line = '-' * 50\n",
        "print(dash_line)\n",
        "print(f'BASELINE HUMAN SUMMARY: \\n{human_baseline_summary}')\n",
        "print(dash_line)\n",
        "print(f'ORIGINAL MODEL: \\n{original_model_text_output}')\n",
        "print(dash_line)\n",
        "print(f'TRAINED MODEL: \\n{trained_model_text_output}')"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "gpuType": "T4",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.13.1"
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "18201ec200b143d0b982926365167688": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "191e83ce3953499c9be33286ec762c0a": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_1fee7f795e714d028ff8af71b9ae56de",
              "IPY_MODEL_ba8f14998569469b95e7153e89a86098",
              "IPY_MODEL_f692c5360afe44839ed23e80a35e7f62"
            ],
            "layout": "IPY_MODEL_3d15497caf044cc7bc4034ef8bc48238"
          }
        },
        "1fee7f795e714d028ff8af71b9ae56de": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_733007b2c33a42ebb42d6ce8392b514b",
            "placeholder": "​",
            "style": "IPY_MODEL_5cfb7f92123f4884aa4442e227277378",
            "value": "Map: 100%"
          }
        },
        "20b63bfeec184873a1246086ececfc0c": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "2c4d28068f1d4db5987ddd166c71477e": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2e038389704c4f40b8ab9cead672e725": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_e85554e5cd22434e892693edbcc83a4b",
              "IPY_MODEL_b9eb2c31392945a1bc3bacdd782addac",
              "IPY_MODEL_5a5a96fba0334b50814fd03b86c418fc"
            ],
            "layout": "IPY_MODEL_e6466602735343eabb67125d74258634"
          }
        },
        "3d15497caf044cc7bc4034ef8bc48238": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "4885cbf3e7594b0ab825721187fe0c27": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ProgressStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "5a5a96fba0334b50814fd03b86c418fc": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_9ce011dc17e64018ba76b7f408f99c78",
            "placeholder": "​",
            "style": "IPY_MODEL_5ee5e919d1e8426aa9aa9108ac46d26d",
            "value": " 500/500 [00:00&lt;00:00, 1056.20 examples/s]"
          }
        },
        "5cfb7f92123f4884aa4442e227277378": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "5ee5e919d1e8426aa9aa9108ac46d26d": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "5f181ed93540409eabf5481cfcca8912": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "733007b2c33a42ebb42d6ce8392b514b": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "775b61d5bde74ac7a8f74262ee208f0a": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ProgressStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "89cd27d582994cfb8db85a8be2df76e3": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "9ce011dc17e64018ba76b7f408f99c78": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b9eb2c31392945a1bc3bacdd782addac": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_5f181ed93540409eabf5481cfcca8912",
            "max": 500,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_4885cbf3e7594b0ab825721187fe0c27",
            "value": 500
          }
        },
        "ba8f14998569469b95e7153e89a86098": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_2c4d28068f1d4db5987ddd166c71477e",
            "max": 500,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_775b61d5bde74ac7a8f74262ee208f0a",
            "value": 500
          }
        },
        "e6466602735343eabb67125d74258634": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e85554e5cd22434e892693edbcc83a4b": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_18201ec200b143d0b982926365167688",
            "placeholder": "​",
            "style": "IPY_MODEL_f716d287b7e44e1f9399fa3d42a3a360",
            "value": "Filter: 100%"
          }
        },
        "f692c5360afe44839ed23e80a35e7f62": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_89cd27d582994cfb8db85a8be2df76e3",
            "placeholder": "​",
            "style": "IPY_MODEL_20b63bfeec184873a1246086ececfc0c",
            "value": " 500/500 [00:01&lt;00:00, 331.09 examples/s]"
          }
        },
        "f716d287b7e44e1f9399fa3d42a3a360": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}