diff --git "a/text-summarization.ipynb" "b/text-summarization.ipynb"
new file mode 100644--- /dev/null
+++ "b/text-summarization.ipynb"
@@ -0,0 +1 @@
+{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[],"dockerImageVersionId":30747,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"!pip install -U transformers\n!pip install -U accelerate\n!pip install -U datasets\n!pip install -U bertviz\n!pip install -U umap-learn\n!pip install -U sentencepiece\n!pip install -U urllib3\n!pip install py7zr\n!pip uninstall -y botocore\n!pip install botocore","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2024-07-20T03:45:06.007979Z","iopub.execute_input":"2024-07-20T03:45:06.008672Z","iopub.status.idle":"2024-07-20T03:47:34.628795Z","shell.execute_reply.started":"2024-07-20T03:45:06.008639Z","shell.execute_reply":"2024-07-20T03:47:34.627697Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stdout","text":"Requirement already satisfied: transformers in /opt/conda/lib/python3.10/site-packages (4.42.3)\nCollecting transformers\n  Downloading transformers-4.42.4-py3-none-any.whl.metadata (43 kB)\n\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.6/43.6 kB\u001b[0m \u001b[31m672.7 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n\u001b[?25hRequirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from transformers) (3.13.1)\nRequirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /opt/conda/lib/python3.10/site-packages (from transformers) (0.23.4)\nRequirement already satisfied: numpy<2.0,>=1.17 in /opt/conda/lib/python3.10/site-packages (from transformers) (1.26.4)\nRequirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.10/site-packages (from transformers) (21.3)\nRequirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from transformers) (6.0.1)\nRequirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.10/site-packages (from transformers) (2023.12.25)\nRequirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from transformers) (2.32.3)\nRequirement already satisfied: safetensors>=0.4.1 in /opt/conda/lib/python3.10/site-packages (from transformers) (0.4.3)\nRequirement already satisfied: tokenizers<0.20,>=0.19 in /opt/conda/lib/python3.10/site-packages (from transformers) (0.19.1)\nRequirement already satisfied: tqdm>=4.27 in /opt/conda/lib/python3.10/site-packages (from transformers) (4.66.4)\nRequirement already satisfied: fsspec>=2023.5.0 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0,>=0.23.2->transformers) (2024.5.0)\nRequirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0,>=0.23.2->transformers) (4.9.0)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging>=20.0->transformers) (3.1.1)\nRequirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->transformers) (3.3.2)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->transformers) (3.6)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->transformers) (1.26.18)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->transformers) (2024.7.4)\nDownloading transformers-4.42.4-py3-none-any.whl (9.3 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.3/9.3 MB\u001b[0m \u001b[31m27.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hInstalling collected packages: transformers\n  Attempting uninstall: transformers\n    Found existing installation: transformers 4.42.3\n    Uninstalling transformers-4.42.3:\n      Successfully uninstalled transformers-4.42.3\nSuccessfully installed transformers-4.42.4\nRequirement already satisfied: accelerate in /opt/conda/lib/python3.10/site-packages (0.32.1)\nRequirement already satisfied: numpy<2.0.0,>=1.17 in /opt/conda/lib/python3.10/site-packages (from accelerate) (1.26.4)\nRequirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.10/site-packages (from accelerate) (21.3)\nRequirement already satisfied: psutil in /opt/conda/lib/python3.10/site-packages (from accelerate) (5.9.3)\nRequirement already satisfied: pyyaml in /opt/conda/lib/python3.10/site-packages (from accelerate) (6.0.1)\nRequirement already satisfied: torch>=1.10.0 in /opt/conda/lib/python3.10/site-packages (from accelerate) (2.1.2)\nRequirement already satisfied: huggingface-hub in /opt/conda/lib/python3.10/site-packages (from accelerate) (0.23.4)\nRequirement already satisfied: safetensors>=0.3.1 in /opt/conda/lib/python3.10/site-packages (from accelerate) (0.4.3)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging>=20.0->accelerate) (3.1.1)\nRequirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (3.13.1)\nRequirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (4.9.0)\nRequirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (1.13.0)\nRequirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (3.2.1)\nRequirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (3.1.2)\nRequirement already satisfied: fsspec in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (2024.5.0)\nRequirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from huggingface-hub->accelerate) (2.32.3)\nRequirement already satisfied: tqdm>=4.42.1 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub->accelerate) (4.66.4)\nRequirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.3)\nRequirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub->accelerate) (3.3.2)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub->accelerate) (3.6)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub->accelerate) (1.26.18)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub->accelerate) (2024.7.4)\nRequirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/conda/lib/python3.10/site-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)\nRequirement already satisfied: datasets in /opt/conda/lib/python3.10/site-packages (2.20.0)\nRequirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from datasets) (3.13.1)\nRequirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from datasets) (1.26.4)\nRequirement already satisfied: pyarrow>=15.0.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (16.1.0)\nRequirement already satisfied: pyarrow-hotfix in /opt/conda/lib/python3.10/site-packages (from datasets) (0.6)\nRequirement already satisfied: dill<0.3.9,>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.3.8)\nRequirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from datasets) (2.2.2)\nRequirement already satisfied: requests>=2.32.2 in /opt/conda/lib/python3.10/site-packages (from datasets) (2.32.3)\nRequirement already satisfied: tqdm>=4.66.3 in /opt/conda/lib/python3.10/site-packages (from datasets) (4.66.4)\nRequirement already satisfied: xxhash in /opt/conda/lib/python3.10/site-packages (from datasets) (3.4.1)\nRequirement already satisfied: multiprocess in /opt/conda/lib/python3.10/site-packages (from datasets) (0.70.16)\nRequirement already satisfied: fsspec<=2024.5.0,>=2023.1.0 in /opt/conda/lib/python3.10/site-packages (from fsspec[http]<=2024.5.0,>=2023.1.0->datasets) (2024.5.0)\nRequirement already satisfied: aiohttp in /opt/conda/lib/python3.10/site-packages (from datasets) (3.9.1)\nRequirement already satisfied: huggingface-hub>=0.21.2 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.23.4)\nRequirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from datasets) (21.3)\nRequirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (6.0.1)\nRequirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (23.2.0)\nRequirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (6.0.4)\nRequirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.9.3)\nRequirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.4.1)\nRequirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.1)\nRequirement already satisfied: async-timeout<5.0,>=4.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (4.0.3)\nRequirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.21.2->datasets) (4.9.0)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging->datasets) (3.1.1)\nRequirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests>=2.32.2->datasets) (3.3.2)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests>=2.32.2->datasets) (3.6)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests>=2.32.2->datasets) (1.26.18)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests>=2.32.2->datasets) (2024.7.4)\nRequirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2.9.0.post0)\nRequirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3.post1)\nRequirement already satisfied: tzdata>=2022.7 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.4)\nRequirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\nCollecting bertviz\n  Downloading bertviz-1.4.0-py3-none-any.whl.metadata (19 kB)\nRequirement already satisfied: transformers>=2.0 in /opt/conda/lib/python3.10/site-packages (from bertviz) (4.42.4)\nRequirement already satisfied: torch>=1.0 in /opt/conda/lib/python3.10/site-packages (from bertviz) (2.1.2)\nRequirement already satisfied: tqdm in /opt/conda/lib/python3.10/site-packages (from bertviz) (4.66.4)\nRequirement already satisfied: boto3 in /opt/conda/lib/python3.10/site-packages (from bertviz) (1.26.100)\nRequirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from bertviz) (2.32.3)\nRequirement already satisfied: regex in /opt/conda/lib/python3.10/site-packages (from bertviz) (2023.12.25)\nRequirement already satisfied: sentencepiece in /opt/conda/lib/python3.10/site-packages (from bertviz) (0.2.0)\nRequirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from torch>=1.0->bertviz) (3.13.1)\nRequirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from torch>=1.0->bertviz) (4.9.0)\nRequirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch>=1.0->bertviz) (1.13.0)\nRequirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch>=1.0->bertviz) (3.2.1)\nRequirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch>=1.0->bertviz) (3.1.2)\nRequirement already satisfied: fsspec in /opt/conda/lib/python3.10/site-packages (from torch>=1.0->bertviz) (2024.5.0)\nRequirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /opt/conda/lib/python3.10/site-packages (from transformers>=2.0->bertviz) (0.23.4)\nRequirement already satisfied: numpy<2.0,>=1.17 in /opt/conda/lib/python3.10/site-packages (from transformers>=2.0->bertviz) (1.26.4)\nRequirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.10/site-packages (from transformers>=2.0->bertviz) (21.3)\nRequirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from transformers>=2.0->bertviz) (6.0.1)\nRequirement already satisfied: safetensors>=0.4.1 in /opt/conda/lib/python3.10/site-packages (from transformers>=2.0->bertviz) (0.4.3)\nRequirement already satisfied: tokenizers<0.20,>=0.19 in /opt/conda/lib/python3.10/site-packages (from transformers>=2.0->bertviz) (0.19.1)\nCollecting botocore<1.30.0,>=1.29.100 (from boto3->bertviz)\n  Downloading botocore-1.29.165-py3-none-any.whl.metadata (5.9 kB)\nRequirement already satisfied: jmespath<2.0.0,>=0.7.1 in /opt/conda/lib/python3.10/site-packages (from boto3->bertviz) (1.0.1)\nRequirement already satisfied: s3transfer<0.7.0,>=0.6.0 in /opt/conda/lib/python3.10/site-packages (from boto3->bertviz) (0.6.2)\nRequirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->bertviz) (3.3.2)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->bertviz) (3.6)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->bertviz) (1.26.18)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->bertviz) (2024.7.4)\nRequirement already satisfied: python-dateutil<3.0.0,>=2.1 in /opt/conda/lib/python3.10/site-packages (from botocore<1.30.0,>=1.29.100->boto3->bertviz) (2.9.0.post0)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging>=20.0->transformers>=2.0->bertviz) (3.1.1)\nRequirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch>=1.0->bertviz) (2.1.3)\nRequirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/conda/lib/python3.10/site-packages (from sympy->torch>=1.0->bertviz) (1.3.0)\nRequirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.30.0,>=1.29.100->boto3->bertviz) (1.16.0)\nDownloading bertviz-1.4.0-py3-none-any.whl (157 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m157.6/157.6 kB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0mm\n\u001b[?25hDownloading botocore-1.29.165-py3-none-any.whl (11.0 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.0/11.0 MB\u001b[0m \u001b[31m51.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n\u001b[?25hInstalling collected packages: botocore, bertviz\n  Attempting uninstall: botocore\n    Found existing installation: botocore 1.34.131\n    Uninstalling botocore-1.34.131:\n      Successfully uninstalled botocore-1.34.131\n\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\naiobotocore 2.13.1 requires aiohttp<4.0.0,>=3.9.2, but you have aiohttp 3.9.1 which is incompatible.\naiobotocore 2.13.1 requires botocore<1.34.132,>=1.34.70, but you have botocore 1.29.165 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0mSuccessfully installed bertviz-1.4.0 botocore-1.29.165\nRequirement already satisfied: umap-learn in /opt/conda/lib/python3.10/site-packages (0.5.6)\nRequirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from umap-learn) (1.26.4)\nRequirement already satisfied: scipy>=1.3.1 in /opt/conda/lib/python3.10/site-packages (from umap-learn) (1.11.4)\nRequirement already satisfied: scikit-learn>=0.22 in /opt/conda/lib/python3.10/site-packages (from umap-learn) (1.2.2)\nRequirement already satisfied: numba>=0.51.2 in /opt/conda/lib/python3.10/site-packages (from umap-learn) (0.58.1)\nRequirement already satisfied: pynndescent>=0.5 in /opt/conda/lib/python3.10/site-packages (from umap-learn) (0.5.13)\nRequirement already satisfied: tqdm in /opt/conda/lib/python3.10/site-packages (from umap-learn) (4.66.4)\nRequirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /opt/conda/lib/python3.10/site-packages (from numba>=0.51.2->umap-learn) (0.41.1)\nRequirement already satisfied: joblib>=0.11 in /opt/conda/lib/python3.10/site-packages (from pynndescent>=0.5->umap-learn) (1.4.2)\nRequirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from scikit-learn>=0.22->umap-learn) (3.2.0)\nRequirement already satisfied: sentencepiece in /opt/conda/lib/python3.10/site-packages (0.2.0)\nRequirement already satisfied: urllib3 in /opt/conda/lib/python3.10/site-packages (1.26.18)\nCollecting urllib3\n  Downloading urllib3-2.2.2-py3-none-any.whl.metadata (6.4 kB)\nDownloading urllib3-2.2.2-py3-none-any.whl (121 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.4/121.4 kB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m \u001b[36m0:00:01\u001b[0m0m\n\u001b[?25hInstalling collected packages: urllib3\n  Attempting uninstall: urllib3\n    Found existing installation: urllib3 1.26.18\n    Uninstalling urllib3-1.26.18:\n      Successfully uninstalled urllib3-1.26.18\n\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\ntensorflow-decision-forests 1.8.1 requires wurlitzer, which is not installed.\nbotocore 1.29.165 requires urllib3<1.27,>=1.25.4, but you have urllib3 2.2.2 which is incompatible.\ndistributed 2024.5.1 requires dask==2024.5.1, but you have dask 2024.7.0 which is incompatible.\nkfp 2.5.0 requires google-cloud-storage<3,>=2.2.1, but you have google-cloud-storage 1.44.0 which is incompatible.\nkfp 2.5.0 requires urllib3<2.0.0, but you have urllib3 2.2.2 which is incompatible.\nrapids-dask-dependency 24.6.0a0 requires dask==2024.5.1, but you have dask 2024.7.0 which is incompatible.\ntensorflow 2.15.0 requires keras<2.16,>=2.15.0, but you have keras 3.4.1 which is incompatible.\nydata-profiling 4.6.4 requires numpy<1.26,>=1.16.0, but you have numpy 1.26.4 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0mSuccessfully installed urllib3-2.1.0\nCollecting py7zr\n  Downloading py7zr-0.21.1-py3-none-any.whl.metadata (17 kB)\nRequirement already satisfied: texttable in /opt/conda/lib/python3.10/site-packages (from py7zr) (1.7.0)\nCollecting pycryptodomex>=3.16.0 (from py7zr)\n  Downloading pycryptodomex-3.20.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)\nCollecting pyzstd>=0.15.9 (from py7zr)\n  Downloading pyzstd-0.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.4 kB)\nCollecting pyppmd<1.2.0,>=1.1.0 (from py7zr)\n  Downloading pyppmd-1.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.7 kB)\nCollecting pybcj<1.1.0,>=1.0.0 (from py7zr)\n  Downloading pybcj-1.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 kB)\nCollecting multivolumefile>=0.2.3 (from py7zr)\n  Downloading multivolumefile-0.2.3-py3-none-any.whl.metadata (6.3 kB)\nCollecting inflate64<1.1.0,>=1.0.0 (from py7zr)\n  Downloading inflate64-1.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 kB)\nRequirement already satisfied: brotli>=1.1.0 in /opt/conda/lib/python3.10/site-packages (from py7zr) (1.1.0)\nRequirement already satisfied: psutil in /opt/conda/lib/python3.10/site-packages (from py7zr) (5.9.3)\nDownloading py7zr-0.21.1-py3-none-any.whl (67 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.8/67.8 kB\u001b[0m \u001b[31m1.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0meta \u001b[36m0:00:01\u001b[0m\n\u001b[?25hDownloading inflate64-1.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (93 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m93.1/93.1 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading multivolumefile-0.2.3-py3-none-any.whl (17 kB)\nDownloading pybcj-1.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (49 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.7/49.7 kB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading pycryptodomex-3.20.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m14.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading pyppmd-1.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (138 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m138.9/138.9 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n\u001b[?25hDownloading pyzstd-0.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (413 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m413.8/413.8 kB\u001b[0m \u001b[31m27.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hInstalling collected packages: pyzstd, pyppmd, pycryptodomex, pybcj, multivolumefile, inflate64, py7zr\nSuccessfully installed inflate64-1.0.0 multivolumefile-0.2.3 py7zr-0.21.1 pybcj-1.0.2 pycryptodomex-3.20.0 pyppmd-1.1.0 pyzstd-0.16.0\nFound existing installation: botocore 1.29.165\nUninstalling botocore-1.29.165:\n  Successfully uninstalled botocore-1.29.165\nCollecting botocore\n  Downloading botocore-1.34.145-py3-none-any.whl.metadata (5.7 kB)\nRequirement already satisfied: jmespath<2.0.0,>=0.7.1 in /opt/conda/lib/python3.10/site-packages (from botocore) (1.0.1)\nRequirement already satisfied: python-dateutil<3.0.0,>=2.1 in /opt/conda/lib/python3.10/site-packages (from botocore) (2.9.0.post0)\nRequirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /opt/conda/lib/python3.10/site-packages (from botocore) (2.1.0)\nRequirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil<3.0.0,>=2.1->botocore) (1.16.0)\nDownloading botocore-1.34.145-py3-none-any.whl (12.4 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.4/12.4 MB\u001b[0m \u001b[31m61.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m0:01\u001b[0m\n\u001b[?25hInstalling collected packages: botocore\n\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\naiobotocore 2.13.1 requires aiohttp<4.0.0,>=3.9.2, but you have aiohttp 3.9.1 which is incompatible.\naiobotocore 2.13.1 requires botocore<1.34.132,>=1.34.70, but you have botocore 1.34.145 which is incompatible.\nboto3 1.26.100 requires botocore<1.30.0,>=1.29.100, but you have botocore 1.34.145 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0mSuccessfully installed botocore-1.34.145\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# Import Transformers, Dataset","metadata":{}},{"cell_type":"code","source":"from datasets import load_dataset\nfrom transformers import pipeline\n\nfrom transformers import AutoModelForSeq2SeqLM, AutoTokenizer\nimport torch","metadata":{"execution":{"iopub.status.busy":"2024-07-20T03:47:34.631285Z","iopub.execute_input":"2024-07-20T03:47:34.631859Z","iopub.status.idle":"2024-07-20T03:47:53.500410Z","shell.execute_reply.started":"2024-07-20T03:47:34.631820Z","shell.execute_reply":"2024-07-20T03:47:53.499569Z"},"trusted":true},"execution_count":2,"outputs":[{"name":"stderr","text":"2024-07-20 03:47:42.643508: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n2024-07-20 03:47:42.643643: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n2024-07-20 03:47:42.773001: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# Load Samsum Dataset","metadata":{}},{"cell_type":"code","source":"samsum_df = load_dataset('samsum')\nsamsum_df","metadata":{"execution":{"iopub.status.busy":"2024-07-20T03:47:53.501564Z","iopub.execute_input":"2024-07-20T03:47:53.502197Z","iopub.status.idle":"2024-07-20T03:48:01.738605Z","shell.execute_reply.started":"2024-07-20T03:47:53.502162Z","shell.execute_reply":"2024-07-20T03:48:01.737606Z"},"trusted":true},"execution_count":3,"outputs":[{"output_type":"display_data","data":{"text/plain":"Downloading builder script:   0%|          | 0.00/3.36k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"84c1b3168aae4be3a008b446edf9d2e6"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading readme:   0%|          | 0.00/7.04k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"0466fa4cd3cc4804aa01e0e9ba8b9fc0"}},"metadata":{}},{"output_type":"stream","name":"stdin","text":"The repository for samsum contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/samsum.\nYou can avoid this prompt in future by passing the argument `trust_remote_code=True`.\n\nDo you wish to run the custom code? [y/N]  y\n"},{"output_type":"display_data","data":{"text/plain":"Downloading data:   0%|          | 0.00/2.94M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"346e206ed9e641dc8f6810ddbdbafa06"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Generating train split:   0%|          | 0/14732 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"e9e3ba10f1b644319e2b245be4727689"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Generating test split:   0%|          | 0/819 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"e6b002bdb86743adbe627fdcdc18c3f8"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Generating validation split:   0%|          | 0/818 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"457de5bbb5554e7187a1570d281c411e"}},"metadata":{}},{"execution_count":3,"output_type":"execute_result","data":{"text/plain":"DatasetDict({\n    train: Dataset({\n        features: ['id', 'dialogue', 'summary'],\n        num_rows: 14732\n    })\n    test: Dataset({\n        features: ['id', 'dialogue', 'summary'],\n        num_rows: 819\n    })\n    validation: Dataset({\n        features: ['id', 'dialogue', 'summary'],\n        num_rows: 818\n    })\n})"},"metadata":{}}]},{"cell_type":"code","source":"samsum_df['train']['dialogue'][100]","metadata":{"execution":{"iopub.status.busy":"2024-07-20T03:48:17.256997Z","iopub.execute_input":"2024-07-20T03:48:17.257409Z","iopub.status.idle":"2024-07-20T03:48:17.294729Z","shell.execute_reply.started":"2024-07-20T03:48:17.257380Z","shell.execute_reply":"2024-07-20T03:48:17.293716Z"},"trusted":true},"execution_count":5,"outputs":[{"execution_count":5,"output_type":"execute_result","data":{"text/plain":"\"Gabby: How is you? Settling into the new house OK?\\r\\nSandra: Good. The kids and the rest of the menagerie are doing fine. The dogs absolutely love the new garden. Plenty of room to dig and run around.\\r\\nGabby: What about the hubby?\\r\\nSandra: Well, apart from being his usual grumpy self I guess he's doing OK.\\r\\nGabby: :-D yeah sounds about right for Jim.\\r\\nSandra: He's a man of few words. No surprises there. Give him a backyard shed and that's the last you'll see of him for months.\\r\\nGabby: LOL that describes most men I know.\\r\\nSandra: Ain't that the truth! \\r\\nGabby: Sure is. :-) My one might as well move into the garage. Always tinkering and building something in there.\\r\\nSandra: Ever wondered what he's doing in there?\\r\\nGabby: All the time. But he keeps the place locked.\\r\\nSandra: Prolly building a portable teleporter or something. ;-)\\r\\nGabby: Or a time machine... LOL\\r\\nSandra: Or a new greatly improved Rabbit :-P\\r\\nGabby: I wish... Lmfao!\""},"metadata":{}}]},{"cell_type":"code","source":"samsum_df['train']['summary'][100]","metadata":{"execution":{"iopub.status.busy":"2024-07-20T03:48:28.533245Z","iopub.execute_input":"2024-07-20T03:48:28.533633Z","iopub.status.idle":"2024-07-20T03:48:28.564101Z","shell.execute_reply.started":"2024-07-20T03:48:28.533604Z","shell.execute_reply":"2024-07-20T03:48:28.562715Z"},"trusted":true},"execution_count":6,"outputs":[{"execution_count":6,"output_type":"execute_result","data":{"text/plain":"'Sandra is setting into the new house; her family is happy with it. Then Sandra and Gabby discuss the nature of their men and laugh about their habit of spending time in the garage or a shed.'"},"metadata":{}}]},{"cell_type":"code","source":"samsum_df['validation']['dialogue'][30]","metadata":{"execution":{"iopub.status.busy":"2024-07-20T03:48:55.082936Z","iopub.execute_input":"2024-07-20T03:48:55.083364Z","iopub.status.idle":"2024-07-20T03:48:55.094064Z","shell.execute_reply.started":"2024-07-20T03:48:55.083331Z","shell.execute_reply":"2024-07-20T03:48:55.092849Z"},"trusted":true},"execution_count":8,"outputs":[{"execution_count":8,"output_type":"execute_result","data":{"text/plain":"\"Toby: Hi, when are you leaving?\\r\\nTheo: on Friday.\\r\\nToby: Soon!\\r\\nTheo: soonish\\r\\nToby: Where are you going? decided?\\r\\nTheo: I was thinking about the Italian Alpes\\r\\nToby: where exactly? \\r\\nTheo: close to Torino I guess\\r\\nToby: Do you have a free seat in the car?\\r\\nTheo: I think we still do\\r\\nToby: So maybe I would join you, just for the weekend.\\r\\nTheo: That would be very nice!\\r\\nToby: And I would come back by train on Monday.\\r\\nTheo: Is it worth it, the journey?\\r\\nToby: I haven't beed skiing for years, I really miss it\\r\\nTheo: ok, but we want to depart before 7am, will you manage?\\r\\nToby: if you could pick my up, or at least come to my neighbourhood...\\r\\nTheo: That's not a problem!\\r\\nToby: Great!\\r\\nTheo: I'll write you later about all the details.\""},"metadata":{}}]},{"cell_type":"code","source":"samsum_df['validation']['summary'][30]","metadata":{"execution":{"iopub.status.busy":"2024-07-20T03:49:04.537104Z","iopub.execute_input":"2024-07-20T03:49:04.537865Z","iopub.status.idle":"2024-07-20T03:49:04.545741Z","shell.execute_reply.started":"2024-07-20T03:49:04.537821Z","shell.execute_reply":"2024-07-20T03:49:04.544567Z"},"trusted":true},"execution_count":9,"outputs":[{"execution_count":9,"output_type":"execute_result","data":{"text/plain":"\"Theo's going to stay near Torino in the region of Italian Alpes. Toby wants to join the trip. Theo agrees and will pick Toby up on Friday at 7 am.\""},"metadata":{}}]},{"cell_type":"markdown","source":"# Text Cleaning","metadata":{}},{"cell_type":"code","source":"import re\n\ndef clean_text(text):\n    # Remove Byte Order Marks (BOM)\n    text = text.replace('\\ufeff', '')\n    # Replace \\n with a space\n    text = text.replace('\\n', ' ')\n    # Replace \\r with a space\n    text = text.replace('\\r', ' ')\n    # Remove backslashes\n    text = text.replace('\\\\', '')\n    # Remove Non-ASCII characters\n    text = re.sub(r'[^\\x00-\\x7F]+', '', text)\n    # Remove Non-printable characters\n    text = re.sub(r'[\\x00-\\x1F\\x7F-\\x9F]', '', text)\n    # Normalize whitespace\n    text = ' '.join(text.split())\n    return text","metadata":{"execution":{"iopub.status.busy":"2024-07-20T03:54:15.172835Z","iopub.execute_input":"2024-07-20T03:54:15.173525Z","iopub.status.idle":"2024-07-20T03:54:15.179639Z","shell.execute_reply.started":"2024-07-20T03:54:15.173490Z","shell.execute_reply":"2024-07-20T03:54:15.178538Z"},"trusted":true},"execution_count":14,"outputs":[]},{"cell_type":"code","source":"# Clean text for train dataset\nsamsum_df['train'] = samsum_df['train'].map(lambda x: {'dialogue': clean_text(x['dialogue'])})\n\n# Clean text for validation dataset\nsamsum_df['validation'] = samsum_df['validation'].map(lambda x: {'dialogue': clean_text(x['dialogue'])})\n\n# Clean text for test dataset\nsamsum_df['test'] = samsum_df['test'].map(lambda x: {'dialogue': clean_text(x['dialogue'])})\n","metadata":{"execution":{"iopub.status.busy":"2024-07-20T03:54:15.646933Z","iopub.execute_input":"2024-07-20T03:54:15.648076Z","iopub.status.idle":"2024-07-20T03:54:17.708694Z","shell.execute_reply.started":"2024-07-20T03:54:15.648042Z","shell.execute_reply":"2024-07-20T03:54:17.707640Z"},"trusted":true},"execution_count":15,"outputs":[{"output_type":"display_data","data":{"text/plain":"Map:   0%|          | 0/14732 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5cd0c24327724a01a31bcf224b9b3155"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Map:   0%|          | 0/818 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"197994820a0b466a8106935aef2e50c0"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Map:   0%|          | 0/819 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"f50db451e94e4f0881514e61d4500d4b"}},"metadata":{}}]},{"cell_type":"code","source":"samsum_df['validation']['dialogue'][30]","metadata":{"execution":{"iopub.status.busy":"2024-07-20T03:54:28.322558Z","iopub.execute_input":"2024-07-20T03:54:28.322945Z","iopub.status.idle":"2024-07-20T03:54:28.331745Z","shell.execute_reply.started":"2024-07-20T03:54:28.322913Z","shell.execute_reply":"2024-07-20T03:54:28.330592Z"},"trusted":true},"execution_count":16,"outputs":[{"execution_count":16,"output_type":"execute_result","data":{"text/plain":"\"Toby: Hi, when are you leaving? Theo: on Friday. Toby: Soon! Theo: soonish Toby: Where are you going? decided? Theo: I was thinking about the Italian Alpes Toby: where exactly? Theo: close to Torino I guess Toby: Do you have a free seat in the car? Theo: I think we still do Toby: So maybe I would join you, just for the weekend. Theo: That would be very nice! Toby: And I would come back by train on Monday. Theo: Is it worth it, the journey? Toby: I haven't beed skiing for years, I really miss it Theo: ok, but we want to depart before 7am, will you manage? Toby: if you could pick my up, or at least come to my neighbourhood... Theo: That's not a problem! Toby: Great! Theo: I'll write you later about all the details.\""},"metadata":{}}]},{"cell_type":"markdown","source":"# Load Model","metadata":{}},{"cell_type":"code","source":"device = 'gpu'\nmodel_ckpt = 'facebook/bart-large-cnn'\ntokenizer = AutoTokenizer.from_pretrained(model_ckpt)\nmodel = AutoModelForSeq2SeqLM.from_pretrained(model_ckpt)","metadata":{"execution":{"iopub.status.busy":"2024-07-20T03:54:34.452624Z","iopub.execute_input":"2024-07-20T03:54:34.453492Z","iopub.status.idle":"2024-07-20T03:54:45.915251Z","shell.execute_reply.started":"2024-07-20T03:54:34.453454Z","shell.execute_reply":"2024-07-20T03:54:45.914418Z"},"trusted":true},"execution_count":17,"outputs":[{"output_type":"display_data","data":{"text/plain":"config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"94b95a170c4e41aa931a844f89dad2f0"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"c45f7a89e7174cfda7265d880ffccfbb"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"3a4939f81d5242f794d1c49f5ab6e945"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"c703afd9d7974273b6dfc2b9e08e2762"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"28328e05dc4d47439fb77993d0986877"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"e6f39cb199b245b5a40f147a4ee1dda0"}},"metadata":{}}]},{"cell_type":"code","source":"dialogue_len = [len(x['dialogue'].split()) for x in samsum_df['train']]\nsummary_len = [len(x['summary'].split()) for x in samsum_df['train']]","metadata":{"execution":{"iopub.status.busy":"2024-07-20T03:54:45.916881Z","iopub.execute_input":"2024-07-20T03:54:45.917222Z","iopub.status.idle":"2024-07-20T03:54:47.570702Z","shell.execute_reply.started":"2024-07-20T03:54:45.917195Z","shell.execute_reply":"2024-07-20T03:54:47.569594Z"},"trusted":true},"execution_count":18,"outputs":[]},{"cell_type":"code","source":"import pandas as pd\n\ndata = pd.DataFrame([dialogue_len, summary_len]).T\ndata.columns = ['Dialogue Length', 'Summary Length']\n\ndata.hist(figsize=(15,5))","metadata":{"execution":{"iopub.status.busy":"2024-07-20T03:54:47.572756Z","iopub.execute_input":"2024-07-20T03:54:47.573190Z","iopub.status.idle":"2024-07-20T03:54:48.620404Z","shell.execute_reply.started":"2024-07-20T03:54:47.573152Z","shell.execute_reply":"2024-07-20T03:54:48.619120Z"},"trusted":true},"execution_count":19,"outputs":[{"execution_count":19,"output_type":"execute_result","data":{"text/plain":"array([[<Axes: title={'center': 'Dialogue Length'}>,\n        <Axes: title={'center': 'Summary Length'}>]], dtype=object)"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<Figure size 1500x500 with 2 Axes>","image/png":""},"metadata":{}}]},{"cell_type":"markdown","source":"# Make Features","metadata":{}},{"cell_type":"code","source":"def get_feature(batch):\n  encodings = tokenizer(batch['dialogue'], text_target=batch['summary'],\n                        max_length=1024, truncation=True)\n\n  encodings = {'input_ids': encodings['input_ids'],\n               'attention_mask': encodings['attention_mask'],\n               'labels': encodings['labels']}\n\n  return encodings","metadata":{"execution":{"iopub.status.busy":"2024-07-20T03:54:49.642045Z","iopub.execute_input":"2024-07-20T03:54:49.642900Z","iopub.status.idle":"2024-07-20T03:54:49.652230Z","shell.execute_reply.started":"2024-07-20T03:54:49.642868Z","shell.execute_reply":"2024-07-20T03:54:49.651222Z"},"trusted":true},"execution_count":20,"outputs":[]},{"cell_type":"code","source":"samsum_pt = samsum_df.map(get_feature, batched=True)","metadata":{"execution":{"iopub.status.busy":"2024-07-20T03:54:51.122633Z","iopub.execute_input":"2024-07-20T03:54:51.123029Z","iopub.status.idle":"2024-07-20T03:55:00.526192Z","shell.execute_reply.started":"2024-07-20T03:54:51.122997Z","shell.execute_reply":"2024-07-20T03:55:00.525216Z"},"trusted":true},"execution_count":21,"outputs":[{"output_type":"display_data","data":{"text/plain":"Map:   0%|          | 0/14732 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"38cf3c8a1b2e496fb8c14a6aa416aeb5"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Map:   0%|          | 0/819 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"531378555ea54d4387b16acc942289e3"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Map:   0%|          | 0/818 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"6653e68ca50549c38564e3039a44533e"}},"metadata":{}}]},{"cell_type":"code","source":"columns = ['input_ids', 'labels', 'attention_mask']\nsamsum_pt.set_format(type='torch', columns=columns)","metadata":{"execution":{"iopub.status.busy":"2024-07-20T03:55:00.528080Z","iopub.execute_input":"2024-07-20T03:55:00.528461Z","iopub.status.idle":"2024-07-20T03:55:00.537879Z","shell.execute_reply.started":"2024-07-20T03:55:00.528432Z","shell.execute_reply":"2024-07-20T03:55:00.536893Z"},"trusted":true},"execution_count":22,"outputs":[]},{"cell_type":"code","source":"samsum_pt","metadata":{"execution":{"iopub.status.busy":"2024-07-20T03:55:00.539382Z","iopub.execute_input":"2024-07-20T03:55:00.539777Z","iopub.status.idle":"2024-07-20T03:55:00.594865Z","shell.execute_reply.started":"2024-07-20T03:55:00.539741Z","shell.execute_reply":"2024-07-20T03:55:00.593650Z"},"trusted":true},"execution_count":23,"outputs":[{"execution_count":23,"output_type":"execute_result","data":{"text/plain":"DatasetDict({\n    train: Dataset({\n        features: ['id', 'dialogue', 'summary', 'input_ids', 'attention_mask', 'labels'],\n        num_rows: 14732\n    })\n    test: Dataset({\n        features: ['id', 'dialogue', 'summary', 'input_ids', 'attention_mask', 'labels'],\n        num_rows: 819\n    })\n    validation: Dataset({\n        features: ['id', 'dialogue', 'summary', 'input_ids', 'attention_mask', 'labels'],\n        num_rows: 818\n    })\n})"},"metadata":{}}]},{"cell_type":"code","source":"from transformers import DataCollatorForSeq2Seq, TrainingArguments, Trainer, EarlyStoppingCallback\nfrom transformers import DataCollatorForSeq2Seq\ndata_collator = DataCollatorForSeq2Seq(tokenizer, model=model)","metadata":{"execution":{"iopub.status.busy":"2024-07-20T03:55:04.012389Z","iopub.execute_input":"2024-07-20T03:55:04.012777Z","iopub.status.idle":"2024-07-20T03:55:04.546776Z","shell.execute_reply.started":"2024-07-20T03:55:04.012749Z","shell.execute_reply":"2024-07-20T03:55:04.545667Z"},"trusted":true},"execution_count":24,"outputs":[]},{"cell_type":"markdown","source":"# Model Training","metadata":{}},{"cell_type":"code","source":"from transformers import TrainingArguments, Trainer\n\ntraining_args = TrainingArguments(\n    output_dir = 'bart_samsum_5',\n    num_train_epochs=2,\n    warmup_steps = 500,\n    per_device_train_batch_size=4,\n    per_device_eval_batch_size=4,\n    weight_decay = 0.01,\n    logging_steps = 10,\n    evaluation_strategy = 'steps',\n    eval_steps=50,\n    save_steps=1e6,\n    gradient_accumulation_steps=16,\n    load_best_model_at_end = True\n)\n\nearly_stopping_callback = EarlyStoppingCallback(early_stopping_patience=3)  # Adjust patience as needed\n\n\ntrainer = Trainer(model=model, args=training_args, tokenizer=tokenizer, data_collator=data_collator,\n                  train_dataset = samsum_pt['train'], eval_dataset = samsum_pt['validation'],callbacks=[early_stopping_callback])","metadata":{"execution":{"iopub.status.busy":"2024-07-20T03:55:12.577962Z","iopub.execute_input":"2024-07-20T03:55:12.578710Z","iopub.status.idle":"2024-07-20T03:55:13.947211Z","shell.execute_reply.started":"2024-07-20T03:55:12.578676Z","shell.execute_reply":"2024-07-20T03:55:13.946390Z"},"trusted":true},"execution_count":25,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/transformers/training_args.py:1494: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n  warnings.warn(\n","output_type":"stream"}]},{"cell_type":"code","source":"trainer.train()","metadata":{"execution":{"iopub.status.busy":"2024-07-20T03:56:03.037558Z","iopub.execute_input":"2024-07-20T03:56:03.037935Z","iopub.status.idle":"2024-07-20T04:50:14.203662Z","shell.execute_reply.started":"2024-07-20T03:56:03.037907Z","shell.execute_reply":"2024-07-20T04:50:14.202602Z"},"trusted":true},"execution_count":26,"outputs":[{"name":"stderr","text":"\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.\n\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:","output_type":"stream"},{"output_type":"stream","name":"stdin","text":"  ········································\n"},{"name":"stderr","text":"\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"wandb version 0.17.5 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Tracking run with wandb version 0.17.4"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Run data is saved locally in <code>/kaggle/working/wandb/run-20240720_035607-g91p99nz</code>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Syncing run <strong><a href='https://wandb.ai/markish9017-Lok%20Jagruti%20University/huggingface/runs/g91p99nz' target=\"_blank\">bart_samsum_5</a></strong> to <a href='https://wandb.ai/markish9017-Lok%20Jagruti%20University/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":" View project at <a href='https://wandb.ai/markish9017-Lok%20Jagruti%20University/huggingface' target=\"_blank\">https://wandb.ai/markish9017-Lok%20Jagruti%20University/huggingface</a>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":" View run at <a href='https://wandb.ai/markish9017-Lok%20Jagruti%20University/huggingface/runs/g91p99nz' target=\"_blank\">https://wandb.ai/markish9017-Lok%20Jagruti%20University/huggingface/runs/g91p99nz</a>"},"metadata":{}},{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/transformers/data/data_collator.py:656: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at /usr/local/src/pytorch/torch/csrc/utils/tensor_new.cpp:261.)\n  batch[\"labels\"] = torch.tensor(batch[\"labels\"], dtype=torch.int64)\n/opt/conda/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n  warnings.warn('Was asked to gather along dimension 0, but all '\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"\n    <div>\n      \n      <progress value='230' max='230' style='width:300px; height:20px; vertical-align: middle;'></progress>\n      [230/230 53:36, Epoch 1/2]\n    </div>\n    <table border=\"1\" class=\"dataframe\">\n  <thead>\n <tr style=\"text-align: left;\">\n      <th>Step</th>\n      <th>Training Loss</th>\n      <th>Validation Loss</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>50</td>\n      <td>1.526700</td>\n      <td>1.510433</td>\n    </tr>\n    <tr>\n      <td>100</td>\n      <td>1.390300</td>\n      <td>1.422089</td>\n    </tr>\n    <tr>\n      <td>150</td>\n      <td>1.287000</td>\n      <td>1.389398</td>\n    </tr>\n    <tr>\n      <td>200</td>\n      <td>1.276400</td>\n      <td>1.352196</td>\n    </tr>\n  </tbody>\n</table><p>"},"metadata":{}},{"name":"stderr","text":"Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\nNon-default generation parameters: {'max_length': 142, 'min_length': 56, 'early_stopping': True, 'num_beams': 4, 'length_penalty': 2.0, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}\n","output_type":"stream"},{"execution_count":26,"output_type":"execute_result","data":{"text/plain":"TrainOutput(global_step=230, training_loss=1.4225341631018598, metrics={'train_runtime': 3250.5316, 'train_samples_per_second': 9.064, 'train_steps_per_second': 0.071, 'total_flos': 1.856077686295757e+16, 'train_loss': 1.4225341631018598, 'epoch': 1.997828447339848})"},"metadata":{}}]},{"cell_type":"markdown","source":"# Saving the Model","metadata":{}},{"cell_type":"code","source":"trainer.save_model('bart_samsum_model_5')\ntokenizer.save_pretrained(\"bart_samsum_model_5\")","metadata":{"execution":{"iopub.status.busy":"2024-07-20T05:07:32.484298Z","iopub.execute_input":"2024-07-20T05:07:32.485479Z","iopub.status.idle":"2024-07-20T05:07:34.686369Z","shell.execute_reply.started":"2024-07-20T05:07:32.485435Z","shell.execute_reply":"2024-07-20T05:07:34.685264Z"},"trusted":true},"execution_count":37,"outputs":[{"name":"stderr","text":"Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\nNon-default generation parameters: {'max_length': 142, 'min_length': 56, 'early_stopping': True, 'num_beams': 4, 'length_penalty': 2.0, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}\n","output_type":"stream"},{"execution_count":37,"output_type":"execute_result","data":{"text/plain":"('bart_samsum_model_5/tokenizer_config.json',\n 'bart_samsum_model_5/special_tokens_map.json',\n 'bart_samsum_model_5/vocab.json',\n 'bart_samsum_model_5/merges.txt',\n 'bart_samsum_model_5/added_tokens.json',\n 'bart_samsum_model_5/tokenizer.json')"},"metadata":{}}]},{"cell_type":"code","source":"!pip install rouge-score\n!pip install datasets","metadata":{"execution":{"iopub.status.busy":"2024-07-20T04:54:24.229870Z","iopub.execute_input":"2024-07-20T04:54:24.230526Z","iopub.status.idle":"2024-07-20T04:54:54.172136Z","shell.execute_reply.started":"2024-07-20T04:54:24.230485Z","shell.execute_reply":"2024-07-20T04:54:54.170917Z"},"trusted":true},"execution_count":28,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.10/pty.py:89: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n  pid, fd = os.forkpty()\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n","output_type":"stream"},{"name":"stdout","text":"Collecting rouge-score\n  Downloading rouge_score-0.1.2.tar.gz (17 kB)\n  Preparing metadata (setup.py) ... \u001b[?25ldone\n\u001b[?25hRequirement already satisfied: absl-py in /opt/conda/lib/python3.10/site-packages (from rouge-score) (1.4.0)\nRequirement already satisfied: nltk in /opt/conda/lib/python3.10/site-packages (from rouge-score) (3.2.4)\nRequirement already satisfied: numpy in /opt/conda/lib/python3.10/site-packages (from rouge-score) (1.26.4)\nRequirement already satisfied: six>=1.14.0 in /opt/conda/lib/python3.10/site-packages (from rouge-score) (1.16.0)\nBuilding wheels for collected packages: rouge-score\n  Building wheel for rouge-score (setup.py) ... \u001b[?25ldone\n\u001b[?25h  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=a69ddf1663ec4e38a560e1d4907f1493314633798fbf3f86251cce2f24a5d281\n  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\nSuccessfully built rouge-score\nInstalling collected packages: rouge-score\nSuccessfully installed rouge-score-0.1.2\n","output_type":"stream"},{"name":"stderr","text":"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n","output_type":"stream"},{"name":"stdout","text":"Requirement already satisfied: datasets in /opt/conda/lib/python3.10/site-packages (2.20.0)\nRequirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from datasets) (3.13.1)\nRequirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from datasets) (1.26.4)\nRequirement already satisfied: pyarrow>=15.0.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (16.1.0)\nRequirement already satisfied: pyarrow-hotfix in /opt/conda/lib/python3.10/site-packages (from datasets) (0.6)\nRequirement already satisfied: dill<0.3.9,>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.3.8)\nRequirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from datasets) (2.2.2)\nRequirement already satisfied: requests>=2.32.2 in /opt/conda/lib/python3.10/site-packages (from datasets) (2.32.3)\nRequirement already satisfied: tqdm>=4.66.3 in /opt/conda/lib/python3.10/site-packages (from datasets) (4.66.4)\nRequirement already satisfied: xxhash in /opt/conda/lib/python3.10/site-packages (from datasets) (3.4.1)\nRequirement already satisfied: multiprocess in /opt/conda/lib/python3.10/site-packages (from datasets) (0.70.16)\nRequirement already satisfied: fsspec<=2024.5.0,>=2023.1.0 in /opt/conda/lib/python3.10/site-packages (from fsspec[http]<=2024.5.0,>=2023.1.0->datasets) (2024.5.0)\nRequirement already satisfied: aiohttp in /opt/conda/lib/python3.10/site-packages (from datasets) (3.9.1)\nRequirement already satisfied: huggingface-hub>=0.21.2 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.23.4)\nRequirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from datasets) (21.3)\nRequirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (6.0.1)\nRequirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (23.2.0)\nRequirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (6.0.4)\nRequirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.9.3)\nRequirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.4.1)\nRequirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.1)\nRequirement already satisfied: async-timeout<5.0,>=4.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (4.0.3)\nRequirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.21.2->datasets) (4.9.0)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging->datasets) (3.1.1)\nRequirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests>=2.32.2->datasets) (3.3.2)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests>=2.32.2->datasets) (3.6)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests>=2.32.2->datasets) (2.1.0)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests>=2.32.2->datasets) (2024.7.4)\nRequirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2.9.0.post0)\nRequirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3.post1)\nRequirement already satisfied: tzdata>=2022.7 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.4)\nRequirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# Testing on Custom Text","metadata":{}},{"cell_type":"code","source":"pipe = pipeline('summarization', model='bart_samsum_model_5')\ngen_kwargs = {'length_penalty': 0.8, 'num_beams': 8, \"min_length\": 30}\n\ncustom_dialogue=\"\"\"\nLaxmi Kant: what work you planning to give Tom?\nJuli: i was hoping to send him on a business trip first.\nLaxmi Kant: cool. is there any suitable work for him?\nJuli: he did excellent in last quarter. i will assign new project, once he is back.\n\"\"\"\nprint(pipe(custom_dialogue, **gen_kwargs))","metadata":{"execution":{"iopub.status.busy":"2024-07-20T05:07:41.074458Z","iopub.execute_input":"2024-07-20T05:07:41.074845Z","iopub.status.idle":"2024-07-20T05:07:49.387129Z","shell.execute_reply.started":"2024-07-20T05:07:41.074815Z","shell.execute_reply":"2024-07-20T05:07:49.385789Z"},"trusted":true},"execution_count":38,"outputs":[{"name":"stderr","text":"Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.\nYour max_length is set to 142, but your input_length is only 72. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=36)\n","output_type":"stream"},{"name":"stdout","text":"[{'summary_text': 'Juli wants to send Tom on a business trip first. He will assign a new project to Tom once he is back. '}]\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# Testing on Test dataset","metadata":{}},{"cell_type":"code","source":"from datasets import load_metric, load_dataset\nfrom transformers import pipeline\n\n# Load ROUGE metric\nrouge = load_metric('rouge')\n\n# Load samsum dataset\n# samsum = load_dataset('samsum')\n\n# Initialize summarization pipeline\ndevice = 0 if torch.cuda.is_available() else -1\npipe = pipeline('summarization', model='/kaggle/working/bart_samsum_model_5', device=device)\n\n# Function to generate summaries\ndef generate_summary(text, gen_kwargs):\n    summary = pipe(text, **gen_kwargs)\n    return summary[0]['summary_text']\n\n# Lists to store references and generated summaries\nreferences = samsum_df['test']['summary']\ngenerated_summaries = []\n\n# Generate summaries for your dataset\nfor dialogue in samsum_df['test']['dialogue']:\n    input_length = len(dialogue.split())\n    max_length = max(10, min(142, input_length // 2))\n    generated_summary = generate_summary(dialogue, gen_kwargs={'min_length': 30, 'max_length': max_length, 'num_beams': 4})\n    generated_summaries.append(generated_summary)\n\n# Compute ROUGE scores\nresults = rouge.compute(predictions=generated_summaries, references=references)\n\n# Print the results\nfor key in results.keys():\n    print(f\"{key}: {results[key].mid}\")","metadata":{"execution":{"iopub.status.busy":"2024-07-20T04:55:46.985309Z","iopub.execute_input":"2024-07-20T04:55:46.985689Z","iopub.status.idle":"2024-07-20T05:01:53.179173Z","shell.execute_reply.started":"2024-07-20T04:55:46.985658Z","shell.execute_reply":"2024-07-20T05:01:53.178116Z"},"trusted":true},"execution_count":31,"outputs":[{"name":"stderr","text":"/tmp/ipykernel_34/2165442090.py:5: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate\n  rouge = load_metric('rouge')\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"Downloading builder script:   0%|          | 0.00/2.17k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5ca0e26289d5444e80cbc5d4fd403b50"}},"metadata":{}},{"output_type":"stream","name":"stdin","text":"The repository for rouge contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/rouge.\nYou can avoid this prompt in future by passing the argument `trust_remote_code=True`.\n\nDo you wish to run the custom code? [y/N]  y\n"},{"name":"stderr","text":"You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\nYour min_length=30 must be inferior than your max_length=28.\n/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1273: UserWarning: Unfeasible length constraints: `min_length` (30) is larger than the maximum possible length (28). Generation will stop at the defined maximum length. You should decrease the minimum length and/or increase the maximum length.\n  warnings.warn(\nYour min_length=30 must be inferior than your max_length=12.\n/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1273: UserWarning: Unfeasible length constraints: `min_length` (30) is larger than the maximum possible length (12). Generation will stop at the defined maximum length. You should decrease the minimum length and/or increase the maximum length.\n  warnings.warn(\nYour min_length=30 must be inferior than your max_length=22.\n/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1273: UserWarning: Unfeasible length constraints: `min_length` (30) is larger than the maximum possible length (22). Generation will stop at the defined maximum length. You should decrease the minimum length and/or increase the maximum length.\n  warnings.warn(\nYour min_length=30 must be inferior than your max_length=19.\n/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1273: UserWarning: Unfeasible length constraints: `min_length` (30) is larger than the maximum possible length (19). Generation will stop at the defined maximum length. You should decrease the minimum length and/or increase the maximum length.\n  warnings.warn(\nYour min_length=30 must be inferior than your max_length=23.\n/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1273: UserWarning: Unfeasible length constraints: `min_length` (30) is larger than the maximum possible length (23). Generation will stop at the defined maximum length. You should decrease the minimum length and/or increase the maximum length.\n  warnings.warn(\nYour min_length=30 must be inferior than your max_length=12.\nYour min_length=30 must be inferior than your max_length=12.\nYour min_length=30 must be inferior than your max_length=16.\n/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1273: UserWarning: Unfeasible length constraints: `min_length` (30) is larger than the maximum possible length (16). Generation will stop at the defined maximum length. You should decrease the minimum length and/or increase the maximum length.\n  warnings.warn(\nYour min_length=30 must be inferior than your max_length=10.\n/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1273: UserWarning: Unfeasible length constraints: `min_length` (30) is larger than the maximum possible length (10). Generation will stop at the defined maximum length. You should decrease the minimum length and/or increase the maximum length.\n  warnings.warn(\nYour min_length=30 must be inferior than your max_length=21.\n/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1273: UserWarning: Unfeasible length constraints: `min_length` (30) is larger than the maximum possible length (21). Generation will stop at the defined maximum length. You should decrease the minimum length and/or increase the maximum length.\n  warnings.warn(\nYour min_length=30 must be inferior than your max_length=21.\nYour min_length=30 must be inferior than your max_length=27.\n/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1273: UserWarning: Unfeasible length constraints: `min_length` (30) is larger than the maximum possible length (27). Generation will stop at the defined maximum length. You should decrease the minimum length and/or increase the maximum length.\n  warnings.warn(\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=21.\nYour min_length=30 must be inferior than your max_length=24.\n/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1273: UserWarning: Unfeasible length constraints: `min_length` (30) is larger than the maximum possible length (24). Generation will stop at the defined maximum length. You should decrease the minimum length and/or increase the maximum length.\n  warnings.warn(\nYour min_length=30 must be inferior than your max_length=24.\nYour min_length=30 must be inferior than your max_length=17.\n/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1273: UserWarning: Unfeasible length constraints: `min_length` (30) is larger than the maximum possible length (17). Generation will stop at the defined maximum length. You should decrease the minimum length and/or increase the maximum length.\n  warnings.warn(\nYour min_length=30 must be inferior than your max_length=26.\n/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1273: UserWarning: Unfeasible length constraints: `min_length` (30) is larger than the maximum possible length (26). Generation will stop at the defined maximum length. You should decrease the minimum length and/or increase the maximum length.\n  warnings.warn(\nYour min_length=30 must be inferior than your max_length=25.\n/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1273: UserWarning: Unfeasible length constraints: `min_length` (30) is larger than the maximum possible length (25). Generation will stop at the defined maximum length. You should decrease the minimum length and/or increase the maximum length.\n  warnings.warn(\nYour min_length=30 must be inferior than your max_length=23.\nYour min_length=30 must be inferior than your max_length=28.\nYour min_length=30 must be inferior than your max_length=16.\nYour min_length=30 must be inferior than your max_length=23.\nYour min_length=30 must be inferior than your max_length=11.\n/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1273: UserWarning: Unfeasible length constraints: `min_length` (30) is larger than the maximum possible length (11). Generation will stop at the defined maximum length. You should decrease the minimum length and/or increase the maximum length.\n  warnings.warn(\nYour min_length=30 must be inferior than your max_length=12.\nYour min_length=30 must be inferior than your max_length=28.\nYour min_length=30 must be inferior than your max_length=20.\n/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1273: UserWarning: Unfeasible length constraints: `min_length` (30) is larger than the maximum possible length (20). Generation will stop at the defined maximum length. You should decrease the minimum length and/or increase the maximum length.\n  warnings.warn(\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=26.\nYour min_length=30 must be inferior than your max_length=18.\n/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1273: UserWarning: Unfeasible length constraints: `min_length` (30) is larger than the maximum possible length (18). Generation will stop at the defined maximum length. You should decrease the minimum length and/or increase the maximum length.\n  warnings.warn(\nYour min_length=30 must be inferior than your max_length=24.\nYour min_length=30 must be inferior than your max_length=11.\nYour min_length=30 must be inferior than your max_length=19.\nYour min_length=30 must be inferior than your max_length=25.\nYour min_length=30 must be inferior than your max_length=12.\nYour min_length=30 must be inferior than your max_length=12.\nYour min_length=30 must be inferior than your max_length=18.\nYour min_length=30 must be inferior than your max_length=14.\n/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1273: UserWarning: Unfeasible length constraints: `min_length` (30) is larger than the maximum possible length (14). Generation will stop at the defined maximum length. You should decrease the minimum length and/or increase the maximum length.\n  warnings.warn(\nYour min_length=30 must be inferior than your max_length=23.\nYour min_length=30 must be inferior than your max_length=22.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=11.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=17.\nYour min_length=30 must be inferior than your max_length=16.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=11.\nYour min_length=30 must be inferior than your max_length=19.\nYour min_length=30 must be inferior than your max_length=23.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=21.\nYour min_length=30 must be inferior than your max_length=22.\nYour min_length=30 must be inferior than your max_length=20.\nYour min_length=30 must be inferior than your max_length=23.\nYour min_length=30 must be inferior than your max_length=13.\n/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1273: UserWarning: Unfeasible length constraints: `min_length` (30) is larger than the maximum possible length (13). Generation will stop at the defined maximum length. You should decrease the minimum length and/or increase the maximum length.\n  warnings.warn(\nYour min_length=30 must be inferior than your max_length=25.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=11.\nYour min_length=30 must be inferior than your max_length=15.\n/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1273: UserWarning: Unfeasible length constraints: `min_length` (30) is larger than the maximum possible length (15). Generation will stop at the defined maximum length. You should decrease the minimum length and/or increase the maximum length.\n  warnings.warn(\nYour min_length=30 must be inferior than your max_length=21.\nYour min_length=30 must be inferior than your max_length=29.\n/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1273: UserWarning: Unfeasible length constraints: `min_length` (30) is larger than the maximum possible length (29). Generation will stop at the defined maximum length. You should decrease the minimum length and/or increase the maximum length.\n  warnings.warn(\nYour min_length=30 must be inferior than your max_length=12.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=21.\nYour min_length=30 must be inferior than your max_length=26.\nYour min_length=30 must be inferior than your max_length=21.\nYour min_length=30 must be inferior than your max_length=11.\nYour min_length=30 must be inferior than your max_length=25.\nYour min_length=30 must be inferior than your max_length=19.\nYour min_length=30 must be inferior than your max_length=25.\nYour min_length=30 must be inferior than your max_length=16.\nYour min_length=30 must be inferior than your max_length=13.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=24.\nYour min_length=30 must be inferior than your max_length=24.\nYour min_length=30 must be inferior than your max_length=27.\nYour min_length=30 must be inferior than your max_length=14.\nYour min_length=30 must be inferior than your max_length=27.\nYour min_length=30 must be inferior than your max_length=28.\nYour min_length=30 must be inferior than your max_length=25.\nYour min_length=30 must be inferior than your max_length=17.\nYour min_length=30 must be inferior than your max_length=29.\nYour min_length=30 must be inferior than your max_length=21.\nYour min_length=30 must be inferior than your max_length=29.\nYour min_length=30 must be inferior than your max_length=15.\nYour min_length=30 must be inferior than your max_length=25.\nYour min_length=30 must be inferior than your max_length=27.\nYour min_length=30 must be inferior than your max_length=16.\nYour min_length=30 must be inferior than your max_length=16.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=27.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=15.\nYour min_length=30 must be inferior than your max_length=25.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=21.\nYour min_length=30 must be inferior than your max_length=17.\nYour min_length=30 must be inferior than your max_length=26.\nYour min_length=30 must be inferior than your max_length=24.\nYour min_length=30 must be inferior than your max_length=25.\nYour min_length=30 must be inferior than your max_length=26.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=24.\nYour min_length=30 must be inferior than your max_length=18.\nYour min_length=30 must be inferior than your max_length=25.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=21.\nYour min_length=30 must be inferior than your max_length=18.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=16.\nYour min_length=30 must be inferior than your max_length=19.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=18.\nYour min_length=30 must be inferior than your max_length=13.\nYour min_length=30 must be inferior than your max_length=26.\nYour min_length=30 must be inferior than your max_length=28.\nYour min_length=30 must be inferior than your max_length=14.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=13.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=19.\nYour min_length=30 must be inferior than your max_length=15.\nYour min_length=30 must be inferior than your max_length=13.\nYour min_length=30 must be inferior than your max_length=29.\nYour min_length=30 must be inferior than your max_length=13.\nYour min_length=30 must be inferior than your max_length=26.\nYour min_length=30 must be inferior than your max_length=29.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=27.\nYour min_length=30 must be inferior than your max_length=11.\nYour min_length=30 must be inferior than your max_length=17.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=12.\nYour min_length=30 must be inferior than your max_length=19.\nYour min_length=30 must be inferior than your max_length=16.\nYour min_length=30 must be inferior than your max_length=18.\nYour min_length=30 must be inferior than your max_length=12.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=13.\nYour min_length=30 must be inferior than your max_length=22.\nYour min_length=30 must be inferior than your max_length=19.\nYour min_length=30 must be inferior than your max_length=19.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=25.\nYour min_length=30 must be inferior than your max_length=17.\nYour min_length=30 must be inferior than your max_length=22.\nYour min_length=30 must be inferior than your max_length=19.\nYour min_length=30 must be inferior than your max_length=26.\nYour min_length=30 must be inferior than your max_length=14.\nYour min_length=30 must be inferior than your max_length=15.\nYour min_length=30 must be inferior than your max_length=25.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=17.\nYour min_length=30 must be inferior than your max_length=19.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=15.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=29.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=23.\nYour min_length=30 must be inferior than your max_length=20.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=22.\nYour min_length=30 must be inferior than your max_length=20.\nYour min_length=30 must be inferior than your max_length=29.\nYour min_length=30 must be inferior than your max_length=23.\nYour min_length=30 must be inferior than your max_length=25.\nYour min_length=30 must be inferior than your max_length=18.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=14.\nYour min_length=30 must be inferior than your max_length=27.\nYour min_length=30 must be inferior than your max_length=27.\nYour min_length=30 must be inferior than your max_length=14.\nYour min_length=30 must be inferior than your max_length=12.\nYour min_length=30 must be inferior than your max_length=17.\nYour min_length=30 must be inferior than your max_length=26.\nYour min_length=30 must be inferior than your max_length=24.\nYour min_length=30 must be inferior than your max_length=19.\nYour min_length=30 must be inferior than your max_length=25.\nYour min_length=30 must be inferior than your max_length=27.\nYour min_length=30 must be inferior than your max_length=15.\nYour min_length=30 must be inferior than your max_length=26.\nYour min_length=30 must be inferior than your max_length=21.\nYour min_length=30 must be inferior than your max_length=23.\nYour min_length=30 must be inferior than your max_length=28.\nYour min_length=30 must be inferior than your max_length=19.\nYour min_length=30 must be inferior than your max_length=17.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=12.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=18.\nYour min_length=30 must be inferior than your max_length=19.\nYour min_length=30 must be inferior than your max_length=20.\nYour min_length=30 must be inferior than your max_length=22.\nYour min_length=30 must be inferior than your max_length=26.\nYour min_length=30 must be inferior than your max_length=22.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=19.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=18.\nYour min_length=30 must be inferior than your max_length=28.\nYour min_length=30 must be inferior than your max_length=11.\nYour min_length=30 must be inferior than your max_length=29.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=18.\nYour min_length=30 must be inferior than your max_length=27.\nYour min_length=30 must be inferior than your max_length=12.\nYour min_length=30 must be inferior than your max_length=13.\nYour min_length=30 must be inferior than your max_length=23.\nYour min_length=30 must be inferior than your max_length=12.\nYour min_length=30 must be inferior than your max_length=22.\nYour min_length=30 must be inferior than your max_length=13.\nYour min_length=30 must be inferior than your max_length=22.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=22.\nYour min_length=30 must be inferior than your max_length=27.\nYour min_length=30 must be inferior than your max_length=25.\nYour min_length=30 must be inferior than your max_length=19.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=17.\nYour min_length=30 must be inferior than your max_length=26.\nYour min_length=30 must be inferior than your max_length=14.\nYour min_length=30 must be inferior than your max_length=13.\nYour min_length=30 must be inferior than your max_length=26.\nYour min_length=30 must be inferior than your max_length=16.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=18.\nYour min_length=30 must be inferior than your max_length=12.\nYour min_length=30 must be inferior than your max_length=12.\nYour min_length=30 must be inferior than your max_length=15.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=22.\nYour min_length=30 must be inferior than your max_length=22.\nYour min_length=30 must be inferior than your max_length=15.\nYour min_length=30 must be inferior than your max_length=23.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=11.\nYour min_length=30 must be inferior than your max_length=25.\nYour min_length=30 must be inferior than your max_length=20.\nYour min_length=30 must be inferior than your max_length=16.\nYour min_length=30 must be inferior than your max_length=18.\nYour min_length=30 must be inferior than your max_length=25.\nYour min_length=30 must be inferior than your max_length=11.\nYour min_length=30 must be inferior than your max_length=22.\nYour min_length=30 must be inferior than your max_length=20.\nYour min_length=30 must be inferior than your max_length=26.\nYour min_length=30 must be inferior than your max_length=11.\nYour min_length=30 must be inferior than your max_length=17.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=27.\nYour min_length=30 must be inferior than your max_length=26.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=24.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=14.\nYour min_length=30 must be inferior than your max_length=29.\nYour min_length=30 must be inferior than your max_length=15.\nYour min_length=30 must be inferior than your max_length=16.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=20.\nYour min_length=30 must be inferior than your max_length=14.\nYour min_length=30 must be inferior than your max_length=25.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=13.\nYour min_length=30 must be inferior than your max_length=21.\nYour min_length=30 must be inferior than your max_length=19.\nYour min_length=30 must be inferior than your max_length=19.\nYour min_length=30 must be inferior than your max_length=15.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=19.\nYour min_length=30 must be inferior than your max_length=14.\nYour min_length=30 must be inferior than your max_length=19.\nYour min_length=30 must be inferior than your max_length=16.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=24.\nYour min_length=30 must be inferior than your max_length=28.\nYour min_length=30 must be inferior than your max_length=20.\nYour min_length=30 must be inferior than your max_length=27.\nYour min_length=30 must be inferior than your max_length=26.\nYour min_length=30 must be inferior than your max_length=13.\nYour min_length=30 must be inferior than your max_length=21.\nYour min_length=30 must be inferior than your max_length=18.\nYour min_length=30 must be inferior than your max_length=24.\nYour min_length=30 must be inferior than your max_length=24.\nYour min_length=30 must be inferior than your max_length=22.\nYour min_length=30 must be inferior than your max_length=29.\nYour min_length=30 must be inferior than your max_length=29.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=26.\nYour min_length=30 must be inferior than your max_length=27.\nYour min_length=30 must be inferior than your max_length=21.\nYour min_length=30 must be inferior than your max_length=18.\nYour min_length=30 must be inferior than your max_length=13.\nYour min_length=30 must be inferior than your max_length=17.\nYour min_length=30 must be inferior than your max_length=28.\nYour min_length=30 must be inferior than your max_length=23.\nYour min_length=30 must be inferior than your max_length=28.\nYour min_length=30 must be inferior than your max_length=28.\nYour min_length=30 must be inferior than your max_length=27.\nYour min_length=30 must be inferior than your max_length=21.\nYour min_length=30 must be inferior than your max_length=20.\nYour min_length=30 must be inferior than your max_length=17.\nYour min_length=30 must be inferior than your max_length=24.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=21.\nYour min_length=30 must be inferior than your max_length=22.\nYour min_length=30 must be inferior than your max_length=16.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=15.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=20.\nYour min_length=30 must be inferior than your max_length=16.\nYour min_length=30 must be inferior than your max_length=15.\nYour min_length=30 must be inferior than your max_length=29.\nYour min_length=30 must be inferior than your max_length=16.\nYour min_length=30 must be inferior than your max_length=16.\nYour min_length=30 must be inferior than your max_length=21.\nYour min_length=30 must be inferior than your max_length=11.\nYour min_length=30 must be inferior than your max_length=11.\nYour min_length=30 must be inferior than your max_length=16.\nYour min_length=30 must be inferior than your max_length=13.\nYour min_length=30 must be inferior than your max_length=10.\nYour min_length=30 must be inferior than your max_length=14.\n","output_type":"stream"},{"name":"stdout","text":"rouge1: Score(precision=0.5240761726639174, recall=0.5164609406411964, fmeasure=0.48813699709755287)\nrouge2: Score(precision=0.2750003463132431, recall=0.26328277298441694, fmeasure=0.2498681871552253)\nrougeL: Score(precision=0.4338563417029981, recall=0.42507261340158575, fmeasure=0.40241707690768413)\nrougeLsum: Score(precision=0.4330077769824946, recall=0.42511422355384887, fmeasure=0.4021251370419605)\n","output_type":"stream"}]},{"cell_type":"code","source":"import pandas as pd\n\n# Assuming you have generated summaries in generated_summaries list and references list is already defined\n# Create a DataFrame to store the results\ndf_results = pd.DataFrame({\n    'reference': references,\n    'generated_summary': generated_summaries\n})\n\n# Save the results to a CSV file\ndf_results.to_csv('summaries_results.csv', index=False)\n\n# Optionally, save ROUGE scores to another file\nrouge_scores = {key: results[key].mid for key in results.keys()}\ndf_rouge = pd.DataFrame(rouge_scores.items(), columns=['metric', 'score'])\ndf_rouge.to_csv('rouge_scores.csv', index=False)","metadata":{"execution":{"iopub.status.busy":"2024-07-20T05:02:42.499367Z","iopub.execute_input":"2024-07-20T05:02:42.500177Z","iopub.status.idle":"2024-07-20T05:02:42.521901Z","shell.execute_reply.started":"2024-07-20T05:02:42.500142Z","shell.execute_reply":"2024-07-20T05:02:42.520888Z"},"trusted":true},"execution_count":33,"outputs":[]},{"cell_type":"code","source":"df_results","metadata":{"execution":{"iopub.status.busy":"2024-07-20T05:02:48.580605Z","iopub.execute_input":"2024-07-20T05:02:48.581357Z","iopub.status.idle":"2024-07-20T05:02:48.600133Z","shell.execute_reply.started":"2024-07-20T05:02:48.581324Z","shell.execute_reply":"2024-07-20T05:02:48.599142Z"},"trusted":true},"execution_count":34,"outputs":[{"execution_count":34,"output_type":"execute_result","data":{"text/plain":"                                             reference  \\\n0    Hannah needs Betty's number but Amanda doesn't...   \n1    Eric and Rob are going to watch a stand-up on ...   \n2    Lenny can't decide which trousers to buy. Bob ...   \n3    Emma will be home soon and she will let Will k...   \n4    Jane is in Warsaw. Ollie and Jane has a party....   \n..                                                 ...   \n814  Benjamin didn't come to see a basketball game ...   \n815      The audition starts at 7.30 P.M. in Antena 3.   \n816                    Marta sent a file accidentally,   \n817  There was a meet-and-greet with James Charles ...   \n818  Rachel sends a list of Top 50 films of 2018. J...   \n\n                                     generated_summary  \n0    Hannah doesn't have Betty's number. Amanda ask...  \n1    Rob will watch some stand-up comedy by Russian...  \n2    Lenny will buy the first or the third pair of ...  \n3    Emma will be home soon. She will tell Will wha...  \n4    Jane is back from Morocco. Ollie and Jane will...  \n..                                                 ...  \n814  Benjamin was unable to attend Friday night's b...  \n815                               Jamilla reminds Kiki  \n816        Marta accidentally sent Weronika and Agnies  \n817  There was a meet and greet with James Charles ...  \n818  Janice has watched almost all the Top 50 Best ...  \n\n[819 rows x 2 columns]","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>reference</th>\n      <th>generated_summary</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Hannah needs Betty's number but Amanda doesn't...</td>\n      <td>Hannah doesn't have Betty's number. Amanda ask...</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Eric and Rob are going to watch a stand-up on ...</td>\n      <td>Rob will watch some stand-up comedy by Russian...</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Lenny can't decide which trousers to buy. Bob ...</td>\n      <td>Lenny will buy the first or the third pair of ...</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Emma will be home soon and she will let Will k...</td>\n      <td>Emma will be home soon. She will tell Will wha...</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Jane is in Warsaw. Ollie and Jane has a party....</td>\n      <td>Jane is back from Morocco. Ollie and Jane will...</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>814</th>\n      <td>Benjamin didn't come to see a basketball game ...</td>\n      <td>Benjamin was unable to attend Friday night's b...</td>\n    </tr>\n    <tr>\n      <th>815</th>\n      <td>The audition starts at 7.30 P.M. in Antena 3.</td>\n      <td>Jamilla reminds Kiki</td>\n    </tr>\n    <tr>\n      <th>816</th>\n      <td>Marta sent a file accidentally,</td>\n      <td>Marta accidentally sent Weronika and Agnies</td>\n    </tr>\n    <tr>\n      <th>817</th>\n      <td>There was a meet-and-greet with James Charles ...</td>\n      <td>There was a meet and greet with James Charles ...</td>\n    </tr>\n    <tr>\n      <th>818</th>\n      <td>Rachel sends a list of Top 50 films of 2018. J...</td>\n      <td>Janice has watched almost all the Top 50 Best ...</td>\n    </tr>\n  </tbody>\n</table>\n<p>819 rows × 2 columns</p>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"df_rouge","metadata":{"execution":{"iopub.status.busy":"2024-07-20T05:02:58.224837Z","iopub.execute_input":"2024-07-20T05:02:58.225244Z","iopub.status.idle":"2024-07-20T05:02:58.240278Z","shell.execute_reply.started":"2024-07-20T05:02:58.225213Z","shell.execute_reply":"2024-07-20T05:02:58.238961Z"},"trusted":true},"execution_count":35,"outputs":[{"execution_count":35,"output_type":"execute_result","data":{"text/plain":"      metric                                              score\n0     rouge1  (0.5240761726639174, 0.5164609406411964, 0.488...\n1     rouge2  (0.2750003463132431, 0.26328277298441694, 0.24...\n2     rougeL  (0.4338563417029981, 0.42507261340158575, 0.40...\n3  rougeLsum  (0.4330077769824946, 0.42511422355384887, 0.40...","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>metric</th>\n      <th>score</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>rouge1</td>\n      <td>(0.5240761726639174, 0.5164609406411964, 0.488...</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>rouge2</td>\n      <td>(0.2750003463132431, 0.26328277298441694, 0.24...</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>rougeL</td>\n      <td>(0.4338563417029981, 0.42507261340158575, 0.40...</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>rougeLsum</td>\n      <td>(0.4330077769824946, 0.42511422355384887, 0.40...</td>\n    </tr>\n  </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}
\ No newline at end of file