diff --git "a/dps_challenge_notebook.ipynb" "b/dps_challenge_notebook.ipynb"
new file mode 100644--- /dev/null
+++ "b/dps_challenge_notebook.ipynb"
@@ -0,0 +1,3058 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "D4Yal6HOyyBt"
+      },
+      "source": [
+        "# Importing Libraries & loading data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 84,
+      "metadata": {
+        "id": "Sesct3fTzQVW"
+      },
+      "outputs": [],
+      "source": [
+        "import pandas as pd\n",
+        "import matplotlib.pyplot as plt\n",
+        "from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder\n",
+        "import xgboost as xgb\n",
+        "from sklearn.model_selection import train_test_split, GridSearchCV\n",
+        "import numpy as np\n",
+        "from sklearn.metrics import mean_squared_error\n",
+        "from xgboost import XGBRegressor\n",
+        "import calendar\n",
+        "import pickle"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "h5D2aP28yyBw"
+      },
+      "source": [
+        "# Checking the data for null values"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 85,
+      "metadata": {
+        "id": "WGqXaM-XzQVX"
+      },
+      "outputs": [],
+      "source": [
+        "parent_df = pd.read_csv(\"/content/monatszahlen2412_verkehrsunfaelle_06_12_24.csv\")\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 86,
+      "metadata": {
+        "id": "zBBpsXqszQVY",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "a61d9595-6efe-4111-9192-529e54dd7fe3"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "<class 'pandas.core.frame.DataFrame'>\n",
+            "RangeIndex: 2254 entries, 0 to 2253\n",
+            "Data columns (total 9 columns):\n",
+            " #   Column                          Non-Null Count  Dtype  \n",
+            "---  ------                          --------------  -----  \n",
+            " 0   MONATSZAHL                      2254 non-null   object \n",
+            " 1   AUSPRAEGUNG                     2254 non-null   object \n",
+            " 2   JAHR                            2254 non-null   int64  \n",
+            " 3   MONAT                           2254 non-null   object \n",
+            " 4   WERT                            2086 non-null   float64\n",
+            " 5   VORJAHRESWERT                   2086 non-null   float64\n",
+            " 6   VERAEND_VORMONAT_PROZENT        1924 non-null   float64\n",
+            " 7   VERAEND_VORJAHRESMONAT_PROZENT  2001 non-null   float64\n",
+            " 8   ZWOELF_MONATE_MITTELWERT        1932 non-null   float64\n",
+            "dtypes: float64(5), int64(1), object(3)\n",
+            "memory usage: 158.6+ KB\n",
+            "None\n",
+            "MONATSZAHL                          0\n",
+            "AUSPRAEGUNG                         0\n",
+            "JAHR                                0\n",
+            "MONAT                               0\n",
+            "WERT                              168\n",
+            "VORJAHRESWERT                     168\n",
+            "VERAEND_VORMONAT_PROZENT          330\n",
+            "VERAEND_VORJAHRESMONAT_PROZENT    253\n",
+            "ZWOELF_MONATE_MITTELWERT          322\n",
+            "dtype: int64\n"
+          ]
+        }
+      ],
+      "source": [
+        "print(parent_df.info())\n",
+        "print(parent_df.isna().sum())\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Only using the 5 columns since only they are deemed important in the instructions\n",
+        "\n",
+        "Important are the first 5 columns:\n",
+        "Category\n",
+        "Accident-type (insgesamt means total for all subcategories)\n",
+        "Year\n",
+        "Month\n",
+        "Value\n"
+      ],
+      "metadata": {
+        "id": "3beiNb3QAS_M"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "parent_df = parent_df[['MONATSZAHL', 'AUSPRAEGUNG', 'JAHR', 'MONAT', 'WERT']]\n",
+        "\n",
+        "print(f\"Unqiue values of MONATSZAHL: {parent_df['MONATSZAHL'].unique()}\")\n",
+        "print(f\"Unqiue values of AUSPRAEGUNG: {parent_df['AUSPRAEGUNG'].unique()}\")\n",
+        "print(f\"Unqiue valus of JAHR: {parent_df['JAHR'].unique()}\")\n",
+        "print(f\"Unqiue valus of MONAT: {parent_df['MONAT'].unique()}\")\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "xTZE4w5qASTn",
+        "outputId": "60696fe6-ee69-4ca6-b947-5d491b91a1b7"
+      },
+      "execution_count": 87,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Unqiue values of MONATSZAHL: ['Alkoholunfälle' 'Fluchtunfälle' 'Verkehrsunfälle']\n",
+            "Unqiue values of AUSPRAEGUNG: ['insgesamt' 'Verletzte und Getötete' 'mit Personenschäden']\n",
+            "Unqiue valus of JAHR: [2024 2023 2022 2021 2020 2019 2018 2017 2016 2015 2014 2013 2012 2011\n",
+            " 2010 2009 2008 2007 2006 2005 2004 2003 2002 2001 2000]\n",
+            "Unqiue valus of MONAT: ['202401' '202402' '202403' '202404' '202405' '202406' '202407' '202408'\n",
+            " '202409' '202410' '202411' '202412' '202301' '202302' '202303' '202304'\n",
+            " '202305' '202306' '202307' '202308' '202309' '202310' '202311' '202312'\n",
+            " 'Summe' '202201' '202202' '202203' '202204' '202205' '202206' '202207'\n",
+            " '202208' '202209' '202210' '202211' '202212' '202101' '202102' '202103'\n",
+            " '202104' '202105' '202106' '202107' '202108' '202109' '202110' '202111'\n",
+            " '202112' '202001' '202002' '202003' '202004' '202005' '202006' '202007'\n",
+            " '202008' '202009' '202010' '202011' '202012' '201901' '201902' '201903'\n",
+            " '201904' '201905' '201906' '201907' '201908' '201909' '201910' '201911'\n",
+            " '201912' '201801' '201802' '201803' '201804' '201805' '201806' '201807'\n",
+            " '201808' '201809' '201810' '201811' '201812' '201701' '201702' '201703'\n",
+            " '201704' '201705' '201706' '201707' '201708' '201709' '201710' '201711'\n",
+            " '201712' '201601' '201602' '201603' '201604' '201605' '201606' '201607'\n",
+            " '201608' '201609' '201610' '201611' '201612' '201501' '201502' '201503'\n",
+            " '201504' '201505' '201506' '201507' '201508' '201509' '201510' '201511'\n",
+            " '201512' '201401' '201402' '201403' '201404' '201405' '201406' '201407'\n",
+            " '201408' '201409' '201410' '201411' '201412' '201301' '201302' '201303'\n",
+            " '201304' '201305' '201306' '201307' '201308' '201309' '201310' '201311'\n",
+            " '201312' '201201' '201202' '201203' '201204' '201205' '201206' '201207'\n",
+            " '201208' '201209' '201210' '201211' '201212' '201101' '201102' '201103'\n",
+            " '201104' '201105' '201106' '201107' '201108' '201109' '201110' '201111'\n",
+            " '201112' '201001' '201002' '201003' '201004' '201005' '201006' '201007'\n",
+            " '201008' '201009' '201010' '201011' '201012' '200901' '200902' '200903'\n",
+            " '200904' '200905' '200906' '200907' '200908' '200909' '200910' '200911'\n",
+            " '200912' '200801' '200802' '200803' '200804' '200805' '200806' '200807'\n",
+            " '200808' '200809' '200810' '200811' '200812' '200701' '200702' '200703'\n",
+            " '200704' '200705' '200706' '200707' '200708' '200709' '200710' '200711'\n",
+            " '200712' '200601' '200602' '200603' '200604' '200605' '200606' '200607'\n",
+            " '200608' '200609' '200610' '200611' '200612' '200501' '200502' '200503'\n",
+            " '200504' '200505' '200506' '200507' '200508' '200509' '200510' '200511'\n",
+            " '200512' '200401' '200402' '200403' '200404' '200405' '200406' '200407'\n",
+            " '200408' '200409' '200410' '200411' '200412' '200301' '200302' '200303'\n",
+            " '200304' '200305' '200306' '200307' '200308' '200309' '200310' '200311'\n",
+            " '200312' '200201' '200202' '200203' '200204' '200205' '200206' '200207'\n",
+            " '200208' '200209' '200210' '200211' '200212' '200101' '200102' '200103'\n",
+            " '200104' '200105' '200106' '200107' '200108' '200109' '200110' '200111'\n",
+            " '200112' '200001' '200002' '200003' '200004' '200005' '200006' '200007'\n",
+            " '200008' '200009' '200010' '200011' '200012']\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Dropping the rows where the year value is after 2020"
+      ],
+      "metadata": {
+        "id": "-6-k9bkqAeCn"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 88,
+      "metadata": {
+        "id": "r5SG6rjMzQVY",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "9a45ba3d-234c-49ca-e722-bb97db5348d3"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Unqiue values of JAHR: [2019 2018 2017 2016 2015 2014 2013 2012 2011 2010 2009 2008 2007 2006\n",
+            " 2005 2004 2003 2002 2001 2000]\n"
+          ]
+        }
+      ],
+      "source": [
+        "parent_df = parent_df[parent_df['JAHR']<2020]\n",
+        "\n",
+        "print(f\"Unqiue values of JAHR: {parent_df['JAHR'].unique()}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "parent_df.head(2)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 112
+        },
+        "id": "jWNecV_uAkkq",
+        "outputId": "501a5e7a-6f21-4fc2-8137-d562e0b6bd5d"
+      },
+      "execution_count": 89,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "        MONATSZAHL AUSPRAEGUNG  JAHR   MONAT   WERT\n",
+              "63  Alkoholunfälle   insgesamt  2019   Summe  434.0\n",
+              "64  Alkoholunfälle   insgesamt  2019  201901   22.0"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-4364bf7f-47ec-45c0-a3dc-4dc3eba953ee\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>MONATSZAHL</th>\n",
+              "      <th>AUSPRAEGUNG</th>\n",
+              "      <th>JAHR</th>\n",
+              "      <th>MONAT</th>\n",
+              "      <th>WERT</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>63</th>\n",
+              "      <td>Alkoholunfälle</td>\n",
+              "      <td>insgesamt</td>\n",
+              "      <td>2019</td>\n",
+              "      <td>Summe</td>\n",
+              "      <td>434.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>64</th>\n",
+              "      <td>Alkoholunfälle</td>\n",
+              "      <td>insgesamt</td>\n",
+              "      <td>2019</td>\n",
+              "      <td>201901</td>\n",
+              "      <td>22.0</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-4364bf7f-47ec-45c0-a3dc-4dc3eba953ee')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-4364bf7f-47ec-45c0-a3dc-4dc3eba953ee button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-4364bf7f-47ec-45c0-a3dc-4dc3eba953ee');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-183ee9d6-0baf-4107-ad56-bb33e02dc441\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-183ee9d6-0baf-4107-ad56-bb33e02dc441')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-183ee9d6-0baf-4107-ad56-bb33e02dc441 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "parent_df",
+              "summary": "{\n  \"name\": \"parent_df\",\n  \"rows\": 1813,\n  \"fields\": [\n    {\n      \"column\": \"MONATSZAHL\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"Alkoholunf\\u00e4lle\",\n          \"Fluchtunf\\u00e4lle\",\n          \"Verkehrsunf\\u00e4lle\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"AUSPRAEGUNG\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"insgesamt\",\n          \"Verletzte und Get\\u00f6tete\",\n          \"mit Personensch\\u00e4den\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"JAHR\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 5,\n        \"min\": 2000,\n        \"max\": 2019,\n        \"num_unique_values\": 20,\n        \"samples\": [\n          2019,\n          2002,\n          2004\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"MONAT\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 241,\n        \"samples\": [\n          \"201812\",\n          \"201906\",\n          \"200106\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"WERT\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 4565.788307713133,\n        \"min\": 0.0,\n        \"max\": 46988.0,\n        \"num_unique_values\": 903,\n        \"samples\": [\n          74.0,\n          4074.0,\n          951.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 89
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "parent_df.reset_index(drop=True, inplace=True)\n",
+        "\n",
+        "parent_df"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 424
+        },
+        "id": "vYRSRdsCAmRY",
+        "outputId": "7810682a-213e-4df6-e32a-15a3fecc1b67"
+      },
+      "execution_count": 90,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "           MONATSZAHL             AUSPRAEGUNG  JAHR   MONAT   WERT\n",
+              "0      Alkoholunfälle               insgesamt  2019   Summe  434.0\n",
+              "1      Alkoholunfälle               insgesamt  2019  201901   22.0\n",
+              "2      Alkoholunfälle               insgesamt  2019  201902   28.0\n",
+              "3      Alkoholunfälle               insgesamt  2019  201903   34.0\n",
+              "4      Alkoholunfälle               insgesamt  2019  201904   36.0\n",
+              "...               ...                     ...   ...     ...    ...\n",
+              "1808  Verkehrsunfälle  Verletzte und Getötete  2000  200008  647.0\n",
+              "1809  Verkehrsunfälle  Verletzte und Getötete  2000  200009  675.0\n",
+              "1810  Verkehrsunfälle  Verletzte und Getötete  2000  200010  615.0\n",
+              "1811  Verkehrsunfälle  Verletzte und Getötete  2000  200011  578.0\n",
+              "1812  Verkehrsunfälle  Verletzte und Getötete  2000  200012  515.0\n",
+              "\n",
+              "[1813 rows x 5 columns]"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-f4c63708-65b2-4964-be28-38e1b014e155\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>MONATSZAHL</th>\n",
+              "      <th>AUSPRAEGUNG</th>\n",
+              "      <th>JAHR</th>\n",
+              "      <th>MONAT</th>\n",
+              "      <th>WERT</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>Alkoholunfälle</td>\n",
+              "      <td>insgesamt</td>\n",
+              "      <td>2019</td>\n",
+              "      <td>Summe</td>\n",
+              "      <td>434.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Alkoholunfälle</td>\n",
+              "      <td>insgesamt</td>\n",
+              "      <td>2019</td>\n",
+              "      <td>201901</td>\n",
+              "      <td>22.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>Alkoholunfälle</td>\n",
+              "      <td>insgesamt</td>\n",
+              "      <td>2019</td>\n",
+              "      <td>201902</td>\n",
+              "      <td>28.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>Alkoholunfälle</td>\n",
+              "      <td>insgesamt</td>\n",
+              "      <td>2019</td>\n",
+              "      <td>201903</td>\n",
+              "      <td>34.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>Alkoholunfälle</td>\n",
+              "      <td>insgesamt</td>\n",
+              "      <td>2019</td>\n",
+              "      <td>201904</td>\n",
+              "      <td>36.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1808</th>\n",
+              "      <td>Verkehrsunfälle</td>\n",
+              "      <td>Verletzte und Getötete</td>\n",
+              "      <td>2000</td>\n",
+              "      <td>200008</td>\n",
+              "      <td>647.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1809</th>\n",
+              "      <td>Verkehrsunfälle</td>\n",
+              "      <td>Verletzte und Getötete</td>\n",
+              "      <td>2000</td>\n",
+              "      <td>200009</td>\n",
+              "      <td>675.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1810</th>\n",
+              "      <td>Verkehrsunfälle</td>\n",
+              "      <td>Verletzte und Getötete</td>\n",
+              "      <td>2000</td>\n",
+              "      <td>200010</td>\n",
+              "      <td>615.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1811</th>\n",
+              "      <td>Verkehrsunfälle</td>\n",
+              "      <td>Verletzte und Getötete</td>\n",
+              "      <td>2000</td>\n",
+              "      <td>200011</td>\n",
+              "      <td>578.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1812</th>\n",
+              "      <td>Verkehrsunfälle</td>\n",
+              "      <td>Verletzte und Getötete</td>\n",
+              "      <td>2000</td>\n",
+              "      <td>200012</td>\n",
+              "      <td>515.0</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>1813 rows × 5 columns</p>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-f4c63708-65b2-4964-be28-38e1b014e155')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-f4c63708-65b2-4964-be28-38e1b014e155 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-f4c63708-65b2-4964-be28-38e1b014e155');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-9da17884-2834-4ff9-a004-d6a5438b9382\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-9da17884-2834-4ff9-a004-d6a5438b9382')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-9da17884-2834-4ff9-a004-d6a5438b9382 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "  <div id=\"id_f0cfdb3b-da69-4f4b-9857-f9dafea63c32\">\n",
+              "    <style>\n",
+              "      .colab-df-generate {\n",
+              "        background-color: #E8F0FE;\n",
+              "        border: none;\n",
+              "        border-radius: 50%;\n",
+              "        cursor: pointer;\n",
+              "        display: none;\n",
+              "        fill: #1967D2;\n",
+              "        height: 32px;\n",
+              "        padding: 0 0 0 0;\n",
+              "        width: 32px;\n",
+              "      }\n",
+              "\n",
+              "      .colab-df-generate:hover {\n",
+              "        background-color: #E2EBFA;\n",
+              "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "        fill: #174EA6;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate {\n",
+              "        background-color: #3B4455;\n",
+              "        fill: #D2E3FC;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate:hover {\n",
+              "        background-color: #434B5C;\n",
+              "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "        fill: #FFFFFF;\n",
+              "      }\n",
+              "    </style>\n",
+              "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('parent_df')\"\n",
+              "            title=\"Generate code using this dataframe.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "       width=\"24px\">\n",
+              "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "    <script>\n",
+              "      (() => {\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#id_f0cfdb3b-da69-4f4b-9857-f9dafea63c32 button.colab-df-generate');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      buttonEl.onclick = () => {\n",
+              "        google.colab.notebook.generateWithVariable('parent_df');\n",
+              "      }\n",
+              "      })();\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "parent_df",
+              "summary": "{\n  \"name\": \"parent_df\",\n  \"rows\": 1813,\n  \"fields\": [\n    {\n      \"column\": \"MONATSZAHL\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"Alkoholunf\\u00e4lle\",\n          \"Fluchtunf\\u00e4lle\",\n          \"Verkehrsunf\\u00e4lle\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"AUSPRAEGUNG\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"insgesamt\",\n          \"Verletzte und Get\\u00f6tete\",\n          \"mit Personensch\\u00e4den\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"JAHR\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 5,\n        \"min\": 2000,\n        \"max\": 2019,\n        \"num_unique_values\": 20,\n        \"samples\": [\n          2019,\n          2002,\n          2004\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"MONAT\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 241,\n        \"samples\": [\n          \"201812\",\n          \"201906\",\n          \"200106\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"WERT\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 4565.788307713133,\n        \"min\": 0.0,\n        \"max\": 46988.0,\n        \"num_unique_values\": 903,\n        \"samples\": [\n          74.0,\n          4074.0,\n          951.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 90
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mixK8LmRyyBx"
+      },
+      "source": [
+        "# Since its a regression task, it always helps to remove outliers from the dataset. it will just exclude values that dont lie near the rest of the data points, making the distribution even more better"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 91,
+      "metadata": {
+        "id": "jJVSlcgGzQVZ"
+      },
+      "outputs": [],
+      "source": [
+        "columns = parent_df.select_dtypes(include=[np.number]).columns\n",
+        "\n",
+        "df = parent_df.copy()\n",
+        "\n",
+        "for col in columns:\n",
+        "    z_scores = np.abs((df[col] - df[col].mean()) / df[col].std())\n",
+        "    df = df[z_scores < 3]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 92,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "hYPXBmXfzQVa",
+        "outputId": "7686e12f-d8eb-48c2-91ae-2060e2d0bef0"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "array(['Summe', '201901', '201902', '201903', '201904', '201905',\n",
+              "       '201906', '201907', '201908', '201909', '201910', '201911',\n",
+              "       '201912', '201801', '201802', '201803', '201804', '201805',\n",
+              "       '201806', '201807', '201808', '201809', '201810', '201811',\n",
+              "       '201812', '201701', '201702', '201703', '201704', '201705',\n",
+              "       '201706', '201707', '201708', '201709', '201710', '201711',\n",
+              "       '201712', '201601', '201602', '201603', '201604', '201605',\n",
+              "       '201606', '201607', '201608', '201609', '201610', '201611',\n",
+              "       '201612', '201501', '201502', '201503', '201504', '201505',\n",
+              "       '201506', '201507', '201508', '201509', '201510', '201511',\n",
+              "       '201512', '201401', '201402', '201403', '201404', '201405',\n",
+              "       '201406', '201407', '201408', '201409', '201410', '201411',\n",
+              "       '201412', '201301', '201302', '201303', '201304', '201305',\n",
+              "       '201306', '201307', '201308', '201309', '201310', '201311',\n",
+              "       '201312', '201201', '201202', '201203', '201204', '201205',\n",
+              "       '201206', '201207', '201208', '201209', '201210', '201211',\n",
+              "       '201212', '201101', '201102', '201103', '201104', '201105',\n",
+              "       '201106', '201107', '201108', '201109', '201110', '201111',\n",
+              "       '201112', '201001', '201002', '201003', '201004', '201005',\n",
+              "       '201006', '201007', '201008', '201009', '201010', '201011',\n",
+              "       '201012', '200901', '200902', '200903', '200904', '200905',\n",
+              "       '200906', '200907', '200908', '200909', '200910', '200911',\n",
+              "       '200912', '200801', '200802', '200803', '200804', '200805',\n",
+              "       '200806', '200807', '200808', '200809', '200810', '200811',\n",
+              "       '200812', '200701', '200702', '200703', '200704', '200705',\n",
+              "       '200706', '200707', '200708', '200709', '200710', '200711',\n",
+              "       '200712', '200601', '200602', '200603', '200604', '200605',\n",
+              "       '200606', '200607', '200608', '200609', '200610', '200611',\n",
+              "       '200612', '200501', '200502', '200503', '200504', '200505',\n",
+              "       '200506', '200507', '200508', '200509', '200510', '200511',\n",
+              "       '200512', '200401', '200402', '200403', '200404', '200405',\n",
+              "       '200406', '200407', '200408', '200409', '200410', '200411',\n",
+              "       '200412', '200301', '200302', '200303', '200304', '200305',\n",
+              "       '200306', '200307', '200308', '200309', '200310', '200311',\n",
+              "       '200312', '200201', '200202', '200203', '200204', '200205',\n",
+              "       '200206', '200207', '200208', '200209', '200210', '200211',\n",
+              "       '200212', '200101', '200102', '200103', '200104', '200105',\n",
+              "       '200106', '200107', '200108', '200109', '200110', '200111',\n",
+              "       '200112', '200001', '200002', '200003', '200004', '200005',\n",
+              "       '200006', '200007', '200008', '200009', '200010', '200011',\n",
+              "       '200012'], dtype=object)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 92
+        }
+      ],
+      "source": [
+        "df['MONAT'].unique()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Observing how values i.e 'WERT' is distributed based on different columns"
+      ],
+      "metadata": {
+        "id": "nN0BPCJfDrz-"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df['MONATSZAHL'].hist(bins=30, alpha=0.7)\n",
+        "plt.title('distribution of accident categories')\n",
+        "plt.show()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 452
+        },
+        "id": "myFIxEpkDtT5",
+        "outputId": "aa5259a4-20b1-48b4-d9a0-75b26c930d45"
+      },
+      "execution_count": 93,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<Figure size 640x480 with 1 Axes>"
+            ],
+            "image/png": "\n"
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df['AUSPRAEGUNG'].hist(bins=30, alpha=0.7)\n",
+        "plt.show()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 430
+        },
+        "id": "qI8yKHJgDw6p",
+        "outputId": "c364e179-1c6e-49fe-cb1c-3a7e4addc341"
+      },
+      "execution_count": 94,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<Figure size 640x480 with 1 Axes>"
+            ],
+            "image/png": "\n"
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# i observed that values are distributed almost uniformly for col JAHR but for column MONAT its a different case"
+      ],
+      "metadata": {
+        "id": "KDJVmnwkD3RV"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df['JAHR'].value_counts()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 743
+        },
+        "id": "vbmMkR5pD2i6",
+        "outputId": "65e87d35-7952-44aa-80e2-67568eecb6cf"
+      },
+      "execution_count": 95,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "JAHR\n",
+              "2019    90\n",
+              "2018    90\n",
+              "2001    90\n",
+              "2002    90\n",
+              "2003    90\n",
+              "2004    90\n",
+              "2005    90\n",
+              "2006    90\n",
+              "2007    90\n",
+              "2008    90\n",
+              "2009    90\n",
+              "2010    90\n",
+              "2011    90\n",
+              "2012    90\n",
+              "2013    90\n",
+              "2014    90\n",
+              "2015    90\n",
+              "2016    90\n",
+              "2017    90\n",
+              "2000    84\n",
+              "Name: count, dtype: int64"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>count</th>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>JAHR</th>\n",
+              "      <th></th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>2019</th>\n",
+              "      <td>90</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2018</th>\n",
+              "      <td>90</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2001</th>\n",
+              "      <td>90</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2002</th>\n",
+              "      <td>90</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2003</th>\n",
+              "      <td>90</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2004</th>\n",
+              "      <td>90</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2005</th>\n",
+              "      <td>90</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2006</th>\n",
+              "      <td>90</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2007</th>\n",
+              "      <td>90</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2008</th>\n",
+              "      <td>90</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2009</th>\n",
+              "      <td>90</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2010</th>\n",
+              "      <td>90</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2011</th>\n",
+              "      <td>90</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2012</th>\n",
+              "      <td>90</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2013</th>\n",
+              "      <td>90</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2014</th>\n",
+              "      <td>90</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2015</th>\n",
+              "      <td>90</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2016</th>\n",
+              "      <td>90</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2017</th>\n",
+              "      <td>90</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2000</th>\n",
+              "      <td>84</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div><br><label><b>dtype:</b> int64</label>"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 95
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df['MONAT'].value_counts()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 490
+        },
+        "id": "cJyIFAQ1D6iU",
+        "outputId": "c443171e-8f63-4b30-e756-b773706c23fb"
+      },
+      "execution_count": 96,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "MONAT\n",
+              "Summe     114\n",
+              "200901      7\n",
+              "200709      7\n",
+              "200710      7\n",
+              "200711      7\n",
+              "         ... \n",
+              "201201      7\n",
+              "201202      7\n",
+              "201203      7\n",
+              "201204      7\n",
+              "200012      7\n",
+              "Name: count, Length: 241, dtype: int64"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>count</th>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>MONAT</th>\n",
+              "      <th></th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>Summe</th>\n",
+              "      <td>114</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>200901</th>\n",
+              "      <td>7</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>200709</th>\n",
+              "      <td>7</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>200710</th>\n",
+              "      <td>7</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>200711</th>\n",
+              "      <td>7</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>201201</th>\n",
+              "      <td>7</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>201202</th>\n",
+              "      <td>7</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>201203</th>\n",
+              "      <td>7</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>201204</th>\n",
+              "      <td>7</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>200012</th>\n",
+              "      <td>7</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>241 rows × 1 columns</p>\n",
+              "</div><br><label><b>dtype:</b> int64</label>"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 96
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Since MONAT includes values like '200703' i.e contains year+month values. We can probly remove the year since we already have that feature in 'JAHR' column. Ill just slice to extract the month values"
+      ],
+      "metadata": {
+        "id": "1kjnie9KDgJw"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def convert_date(data, column_name='MONAT', special_value='Summe'):\n",
+        "\n",
+        "    day_mapping = {\n",
+        "          '01': 'January',\n",
+        "          '02': 'February',\n",
+        "          '03': 'March',\n",
+        "          '04': 'April',\n",
+        "          '05': 'May',\n",
+        "          '06': 'June',\n",
+        "          '07': 'July',\n",
+        "          '08': 'August',\n",
+        "          '09': 'September',\n",
+        "          '10': 'October',\n",
+        "          '11': 'November',\n",
+        "          '12': 'December'\n",
+        "    }\n",
+        "\n",
+        "    data_copy = data.copy()\n",
+        "    data_copy[column_name] = data_copy[column_name].apply(lambda x: day_mapping[x[4:]] if x != special_value else x)\n",
+        "\n",
+        "    return data_copy\n",
+        "\n",
+        "df = convert_date(df, 'MONAT')"
+      ],
+      "metadata": {
+        "id": "Jvv6y70NzDYB"
+      },
+      "execution_count": 97,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df['MONAT'].unique()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "1zsJV0d5D96Y",
+        "outputId": "c63ae632-16de-4eb3-ee2d-2a1c7ac4ecf6"
+      },
+      "execution_count": 98,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "array(['Summe', 'January', 'February', 'March', 'April', 'May', 'June',\n",
+              "       'July', 'August', 'September', 'October', 'November', 'December'],\n",
+              "      dtype=object)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 98
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df['MONAT'].value_counts()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 523
+        },
+        "id": "EEWdkVDQBFG4",
+        "outputId": "5c965b3c-fa8e-4e60-df43-dab476a49a11"
+      },
+      "execution_count": 99,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "MONAT\n",
+              "January      140\n",
+              "February     140\n",
+              "March        140\n",
+              "April        140\n",
+              "May          140\n",
+              "June         140\n",
+              "July         140\n",
+              "August       140\n",
+              "September    140\n",
+              "October      140\n",
+              "November     140\n",
+              "December     140\n",
+              "Summe        114\n",
+              "Name: count, dtype: int64"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>count</th>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>MONAT</th>\n",
+              "      <th></th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>January</th>\n",
+              "      <td>140</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>February</th>\n",
+              "      <td>140</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>March</th>\n",
+              "      <td>140</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>April</th>\n",
+              "      <td>140</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>May</th>\n",
+              "      <td>140</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>June</th>\n",
+              "      <td>140</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>July</th>\n",
+              "      <td>140</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>August</th>\n",
+              "      <td>140</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>September</th>\n",
+              "      <td>140</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>October</th>\n",
+              "      <td>140</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>November</th>\n",
+              "      <td>140</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>December</th>\n",
+              "      <td>140</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>Summe</th>\n",
+              "      <td>114</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div><br><label><b>dtype:</b> int64</label>"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 99
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# The distribution now seems better for MONAT column"
+      ],
+      "metadata": {
+        "id": "PMO4-WmDEBUx"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "plt.figure(figsize=(8,6))\n",
+        "plt.hist(df['MONAT'], bins=13, edgecolor='black')\n",
+        "plt.show()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 522
+        },
+        "id": "cg7JP1E-EC0M",
+        "outputId": "463de13f-cedb-4718-d20d-fb2d49fb8535"
+      },
+      "execution_count": 100,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<Figure size 800x600 with 1 Axes>"
+            ],
+            "image/png": "\n"
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df.head(5)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 206
+        },
+        "id": "3dqIKmKhEFoF",
+        "outputId": "899a11a4-b688-4f51-b006-2ed00d10b51d"
+      },
+      "execution_count": 101,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "       MONATSZAHL AUSPRAEGUNG  JAHR     MONAT   WERT\n",
+              "0  Alkoholunfälle   insgesamt  2019     Summe  434.0\n",
+              "1  Alkoholunfälle   insgesamt  2019   January   22.0\n",
+              "2  Alkoholunfälle   insgesamt  2019  February   28.0\n",
+              "3  Alkoholunfälle   insgesamt  2019     March   34.0\n",
+              "4  Alkoholunfälle   insgesamt  2019     April   36.0"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-79124c7c-989a-48ea-b94c-9326fead693d\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>MONATSZAHL</th>\n",
+              "      <th>AUSPRAEGUNG</th>\n",
+              "      <th>JAHR</th>\n",
+              "      <th>MONAT</th>\n",
+              "      <th>WERT</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>Alkoholunfälle</td>\n",
+              "      <td>insgesamt</td>\n",
+              "      <td>2019</td>\n",
+              "      <td>Summe</td>\n",
+              "      <td>434.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Alkoholunfälle</td>\n",
+              "      <td>insgesamt</td>\n",
+              "      <td>2019</td>\n",
+              "      <td>January</td>\n",
+              "      <td>22.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>Alkoholunfälle</td>\n",
+              "      <td>insgesamt</td>\n",
+              "      <td>2019</td>\n",
+              "      <td>February</td>\n",
+              "      <td>28.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>Alkoholunfälle</td>\n",
+              "      <td>insgesamt</td>\n",
+              "      <td>2019</td>\n",
+              "      <td>March</td>\n",
+              "      <td>34.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>Alkoholunfälle</td>\n",
+              "      <td>insgesamt</td>\n",
+              "      <td>2019</td>\n",
+              "      <td>April</td>\n",
+              "      <td>36.0</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-79124c7c-989a-48ea-b94c-9326fead693d')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-79124c7c-989a-48ea-b94c-9326fead693d button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-79124c7c-989a-48ea-b94c-9326fead693d');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-a0263b96-b1fc-4cfb-9a1c-8f441d9cdcd4\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-a0263b96-b1fc-4cfb-9a1c-8f441d9cdcd4')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-a0263b96-b1fc-4cfb-9a1c-8f441d9cdcd4 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "df",
+              "summary": "{\n  \"name\": \"df\",\n  \"rows\": 1794,\n  \"fields\": [\n    {\n      \"column\": \"MONATSZAHL\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"Alkoholunf\\u00e4lle\",\n          \"Fluchtunf\\u00e4lle\",\n          \"Verkehrsunf\\u00e4lle\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"AUSPRAEGUNG\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"insgesamt\",\n          \"Verletzte und Get\\u00f6tete\",\n          \"mit Personensch\\u00e4den\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"JAHR\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 5,\n        \"min\": 2000,\n        \"max\": 2019,\n        \"num_unique_values\": 20,\n        \"samples\": [\n          2019,\n          2002,\n          2004\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"MONAT\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 13,\n        \"samples\": [\n          \"November\",\n          \"September\",\n          \"Summe\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"WERT\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 1674.8731821587605,\n        \"min\": 0.0,\n        \"max\": 11773.0,\n        \"num_unique_values\": 884,\n        \"samples\": [\n          56.0,\n          4209.0,\n          284.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 101
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dNRwBaKlyyBz"
+      },
+      "source": [
+        "# I was confused between using label encoder or one hot encoder, but since the model performed better for one hot encoder, i decided to just go with it"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "columns_to_encode = df.columns[0:4]"
+      ],
+      "metadata": {
+        "id": "tjvcKt7JcjUm"
+      },
+      "execution_count": 102,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "columns_to_encode"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "P7ctV8D1dIOe",
+        "outputId": "86ace85a-f8d0-4ea2-d82f-16a00d2152cb"
+      },
+      "execution_count": 105,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "Index(['MONATSZAHL', 'AUSPRAEGUNG', 'JAHR', 'MONAT'], dtype='object')"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 105
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def _one_hot(df):\n",
+        "  encoder = OneHotEncoder(sparse_output=False)\n",
+        "\n",
+        "  one_hot_columns = df.columns[0:4]\n",
+        "\n",
+        "  df_copy = df.copy()\n",
+        "\n",
+        "  encoded_columns = encoder.fit_transform(df_copy[one_hot_columns])\n",
+        "\n",
+        "  encoded_column_names = encoder.get_feature_names_out(one_hot_columns)\n",
+        "\n",
+        "  encoded_df = pd.DataFrame(\n",
+        "    encoded_columns,\n",
+        "    columns=encoded_column_names,\n",
+        "    index=df_copy.index\n",
+        "  )\n",
+        "\n",
+        "\n",
+        "  final_df = pd.concat([\n",
+        "    df_copy.drop(columns=one_hot_columns),\n",
+        "    encoded_df\n",
+        "  ], axis=1)\n",
+        "\n",
+        "  return final_df, encoder\n",
+        "\n",
+        "final_df, encoder = _one_hot(df)\n"
+      ],
+      "metadata": {
+        "id": "h3rWO-QWY-Tn"
+      },
+      "execution_count": 114,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 110,
+      "metadata": {
+        "id": "poFSghx08_ig",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "outputId": "99da1f84-2ea1-4ff7-d458-0c77009c8c98"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "WERT                                  0\n",
+              "MONATSZAHL_Alkoholunfälle             0\n",
+              "MONATSZAHL_Fluchtunfälle              0\n",
+              "MONATSZAHL_Verkehrsunfälle            0\n",
+              "AUSPRAEGUNG_Verletzte und Getötete    0\n",
+              "AUSPRAEGUNG_insgesamt                 0\n",
+              "AUSPRAEGUNG_mit Personenschäden       0\n",
+              "JAHR_2000                             0\n",
+              "JAHR_2001                             0\n",
+              "JAHR_2002                             0\n",
+              "JAHR_2003                             0\n",
+              "JAHR_2004                             0\n",
+              "JAHR_2005                             0\n",
+              "JAHR_2006                             0\n",
+              "JAHR_2007                             0\n",
+              "JAHR_2008                             0\n",
+              "JAHR_2009                             0\n",
+              "JAHR_2010                             0\n",
+              "JAHR_2011                             0\n",
+              "JAHR_2012                             0\n",
+              "JAHR_2013                             0\n",
+              "JAHR_2014                             0\n",
+              "JAHR_2015                             0\n",
+              "JAHR_2016                             0\n",
+              "JAHR_2017                             0\n",
+              "JAHR_2018                             0\n",
+              "JAHR_2019                             0\n",
+              "MONAT_April                           0\n",
+              "MONAT_August                          0\n",
+              "MONAT_December                        0\n",
+              "MONAT_February                        0\n",
+              "MONAT_January                         0\n",
+              "MONAT_July                            0\n",
+              "MONAT_June                            0\n",
+              "MONAT_March                           0\n",
+              "MONAT_May                             0\n",
+              "MONAT_November                        0\n",
+              "MONAT_October                         0\n",
+              "MONAT_September                       0\n",
+              "MONAT_Summe                           0\n",
+              "dtype: int64"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>0</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>WERT</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>MONATSZAHL_Alkoholunfälle</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>MONATSZAHL_Fluchtunfälle</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>MONATSZAHL_Verkehrsunfälle</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>AUSPRAEGUNG_Verletzte und Getötete</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>AUSPRAEGUNG_insgesamt</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>AUSPRAEGUNG_mit Personenschäden</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>JAHR_2000</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>JAHR_2001</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>JAHR_2002</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>JAHR_2003</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>JAHR_2004</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>JAHR_2005</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>JAHR_2006</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>JAHR_2007</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>JAHR_2008</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>JAHR_2009</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>JAHR_2010</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>JAHR_2011</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>JAHR_2012</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>JAHR_2013</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>JAHR_2014</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>JAHR_2015</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>JAHR_2016</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>JAHR_2017</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>JAHR_2018</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>JAHR_2019</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>MONAT_April</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>MONAT_August</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>MONAT_December</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>MONAT_February</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>MONAT_January</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>MONAT_July</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>MONAT_June</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>MONAT_March</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>MONAT_May</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>MONAT_November</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>MONAT_October</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>MONAT_September</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>MONAT_Summe</th>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div><br><label><b>dtype:</b> int64</label>"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 110
+        }
+      ],
+      "source": [
+        "final_df.isna().sum()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "d2i8SDsSyyBz"
+      },
+      "source": [
+        "# 3. finally training the model and downloading it as pkl to use in api"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "x = final_df.drop(columns=['WERT'])\n",
+        "y = final_df['WERT']\n",
+        "\n",
+        "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=10)\n"
+      ],
+      "metadata": {
+        "id": "H5GezDyXB_In"
+      },
+      "execution_count": 77,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 78,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 309
+        },
+        "id": "EVn2xhzhzQVa",
+        "outputId": "5fbe46ae-20a7-439d-8f15-e396494dda1a"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Fitting 2 folds for each of 243 candidates, totalling 486 fits\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/xgboost/core.py:158: UserWarning: [15:56:52] WARNING: /workspace/src/common/error_msg.cc:27: The tree method `gpu_hist` is deprecated since 2.0.0. To use GPU training, set the `device` parameter to CUDA instead.\n",
+            "\n",
+            "    E.g. tree_method = \"hist\", device = \"cuda\"\n",
+            "\n",
+            "  warnings.warn(smsg, UserWarning)\n",
+            "/usr/local/lib/python3.10/dist-packages/xgboost/core.py:158: UserWarning: [15:56:52] WARNING: /workspace/src/learner.cc:740: \n",
+            "Parameters: { \"predictor\" } are not used.\n",
+            "\n",
+            "  warnings.warn(smsg, UserWarning)\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "GridSearchCV(cv=2,\n",
+              "             estimator=XGBRegressor(base_score=None, booster=None,\n",
+              "                                    callbacks=None, colsample_bylevel=None,\n",
+              "                                    colsample_bynode=None,\n",
+              "                                    colsample_bytree=None, device=None,\n",
+              "                                    early_stopping_rounds=None,\n",
+              "                                    enable_categorical=False, eval_metric=None,\n",
+              "                                    feature_types=None, gamma=None,\n",
+              "                                    grow_policy=None, importance_type=None,\n",
+              "                                    interaction_constraints=None,\n",
+              "                                    learning_rate=None, m...\n",
+              "                                    min_child_weight=None, missing=nan,\n",
+              "                                    monotone_constraints=None,\n",
+              "                                    multi_strategy=None, n_estimators=None,\n",
+              "                                    n_jobs=None, num_parallel_tree=None,\n",
+              "                                    predictor='gpu_predictor', ...),\n",
+              "             n_jobs=-1,\n",
+              "             param_grid={'colsample_bytree': [0.6, 0.7, 0.8],\n",
+              "                         'learning_rate': [0.01, 0.05, 0.1],\n",
+              "                         'max_depth': [3, 5, 7],\n",
+              "                         'n_estimators': [100, 200, 300],\n",
+              "                         'subsample': [0.7, 0.8, 0.9]},\n",
+              "             scoring='neg_mean_squared_error', verbose=2)"
+            ],
+            "text/html": [
+              "<style>#sk-container-id-2 {\n",
+              "  /* Definition of color scheme common for light and dark mode */\n",
+              "  --sklearn-color-text: black;\n",
+              "  --sklearn-color-line: gray;\n",
+              "  /* Definition of color scheme for unfitted estimators */\n",
+              "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
+              "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
+              "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
+              "  --sklearn-color-unfitted-level-3: chocolate;\n",
+              "  /* Definition of color scheme for fitted estimators */\n",
+              "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
+              "  --sklearn-color-fitted-level-1: #d4ebff;\n",
+              "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
+              "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
+              "\n",
+              "  /* Specific color for light theme */\n",
+              "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
+              "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
+              "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
+              "  --sklearn-color-icon: #696969;\n",
+              "\n",
+              "  @media (prefers-color-scheme: dark) {\n",
+              "    /* Redefinition of color scheme for dark theme */\n",
+              "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
+              "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
+              "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
+              "    --sklearn-color-icon: #878787;\n",
+              "  }\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 {\n",
+              "  color: var(--sklearn-color-text);\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 pre {\n",
+              "  padding: 0;\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 input.sk-hidden--visually {\n",
+              "  border: 0;\n",
+              "  clip: rect(1px 1px 1px 1px);\n",
+              "  clip: rect(1px, 1px, 1px, 1px);\n",
+              "  height: 1px;\n",
+              "  margin: -1px;\n",
+              "  overflow: hidden;\n",
+              "  padding: 0;\n",
+              "  position: absolute;\n",
+              "  width: 1px;\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 div.sk-dashed-wrapped {\n",
+              "  border: 1px dashed var(--sklearn-color-line);\n",
+              "  margin: 0 0.4em 0.5em 0.4em;\n",
+              "  box-sizing: border-box;\n",
+              "  padding-bottom: 0.4em;\n",
+              "  background-color: var(--sklearn-color-background);\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 div.sk-container {\n",
+              "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
+              "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
+              "     so we also need the `!important` here to be able to override the\n",
+              "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
+              "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
+              "  display: inline-block !important;\n",
+              "  position: relative;\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 div.sk-text-repr-fallback {\n",
+              "  display: none;\n",
+              "}\n",
+              "\n",
+              "div.sk-parallel-item,\n",
+              "div.sk-serial,\n",
+              "div.sk-item {\n",
+              "  /* draw centered vertical line to link estimators */\n",
+              "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
+              "  background-size: 2px 100%;\n",
+              "  background-repeat: no-repeat;\n",
+              "  background-position: center center;\n",
+              "}\n",
+              "\n",
+              "/* Parallel-specific style estimator block */\n",
+              "\n",
+              "#sk-container-id-2 div.sk-parallel-item::after {\n",
+              "  content: \"\";\n",
+              "  width: 100%;\n",
+              "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
+              "  flex-grow: 1;\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 div.sk-parallel {\n",
+              "  display: flex;\n",
+              "  align-items: stretch;\n",
+              "  justify-content: center;\n",
+              "  background-color: var(--sklearn-color-background);\n",
+              "  position: relative;\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 div.sk-parallel-item {\n",
+              "  display: flex;\n",
+              "  flex-direction: column;\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 div.sk-parallel-item:first-child::after {\n",
+              "  align-self: flex-end;\n",
+              "  width: 50%;\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 div.sk-parallel-item:last-child::after {\n",
+              "  align-self: flex-start;\n",
+              "  width: 50%;\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 div.sk-parallel-item:only-child::after {\n",
+              "  width: 0;\n",
+              "}\n",
+              "\n",
+              "/* Serial-specific style estimator block */\n",
+              "\n",
+              "#sk-container-id-2 div.sk-serial {\n",
+              "  display: flex;\n",
+              "  flex-direction: column;\n",
+              "  align-items: center;\n",
+              "  background-color: var(--sklearn-color-background);\n",
+              "  padding-right: 1em;\n",
+              "  padding-left: 1em;\n",
+              "}\n",
+              "\n",
+              "\n",
+              "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
+              "clickable and can be expanded/collapsed.\n",
+              "- Pipeline and ColumnTransformer use this feature and define the default style\n",
+              "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
+              "*/\n",
+              "\n",
+              "/* Pipeline and ColumnTransformer style (default) */\n",
+              "\n",
+              "#sk-container-id-2 div.sk-toggleable {\n",
+              "  /* Default theme specific background. It is overwritten whether we have a\n",
+              "  specific estimator or a Pipeline/ColumnTransformer */\n",
+              "  background-color: var(--sklearn-color-background);\n",
+              "}\n",
+              "\n",
+              "/* Toggleable label */\n",
+              "#sk-container-id-2 label.sk-toggleable__label {\n",
+              "  cursor: pointer;\n",
+              "  display: block;\n",
+              "  width: 100%;\n",
+              "  margin-bottom: 0;\n",
+              "  padding: 0.5em;\n",
+              "  box-sizing: border-box;\n",
+              "  text-align: center;\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 label.sk-toggleable__label-arrow:before {\n",
+              "  /* Arrow on the left of the label */\n",
+              "  content: \"▸\";\n",
+              "  float: left;\n",
+              "  margin-right: 0.25em;\n",
+              "  color: var(--sklearn-color-icon);\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {\n",
+              "  color: var(--sklearn-color-text);\n",
+              "}\n",
+              "\n",
+              "/* Toggleable content - dropdown */\n",
+              "\n",
+              "#sk-container-id-2 div.sk-toggleable__content {\n",
+              "  max-height: 0;\n",
+              "  max-width: 0;\n",
+              "  overflow: hidden;\n",
+              "  text-align: left;\n",
+              "  /* unfitted */\n",
+              "  background-color: var(--sklearn-color-unfitted-level-0);\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 div.sk-toggleable__content.fitted {\n",
+              "  /* fitted */\n",
+              "  background-color: var(--sklearn-color-fitted-level-0);\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 div.sk-toggleable__content pre {\n",
+              "  margin: 0.2em;\n",
+              "  border-radius: 0.25em;\n",
+              "  color: var(--sklearn-color-text);\n",
+              "  /* unfitted */\n",
+              "  background-color: var(--sklearn-color-unfitted-level-0);\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 div.sk-toggleable__content.fitted pre {\n",
+              "  /* unfitted */\n",
+              "  background-color: var(--sklearn-color-fitted-level-0);\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
+              "  /* Expand drop-down */\n",
+              "  max-height: 200px;\n",
+              "  max-width: 100%;\n",
+              "  overflow: auto;\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
+              "  content: \"▾\";\n",
+              "}\n",
+              "\n",
+              "/* Pipeline/ColumnTransformer-specific style */\n",
+              "\n",
+              "#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+              "  color: var(--sklearn-color-text);\n",
+              "  background-color: var(--sklearn-color-unfitted-level-2);\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+              "  background-color: var(--sklearn-color-fitted-level-2);\n",
+              "}\n",
+              "\n",
+              "/* Estimator-specific style */\n",
+              "\n",
+              "/* Colorize estimator box */\n",
+              "#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+              "  /* unfitted */\n",
+              "  background-color: var(--sklearn-color-unfitted-level-2);\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+              "  /* fitted */\n",
+              "  background-color: var(--sklearn-color-fitted-level-2);\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 div.sk-label label.sk-toggleable__label,\n",
+              "#sk-container-id-2 div.sk-label label {\n",
+              "  /* The background is the default theme color */\n",
+              "  color: var(--sklearn-color-text-on-default-background);\n",
+              "}\n",
+              "\n",
+              "/* On hover, darken the color of the background */\n",
+              "#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {\n",
+              "  color: var(--sklearn-color-text);\n",
+              "  background-color: var(--sklearn-color-unfitted-level-2);\n",
+              "}\n",
+              "\n",
+              "/* Label box, darken color on hover, fitted */\n",
+              "#sk-container-id-2 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
+              "  color: var(--sklearn-color-text);\n",
+              "  background-color: var(--sklearn-color-fitted-level-2);\n",
+              "}\n",
+              "\n",
+              "/* Estimator label */\n",
+              "\n",
+              "#sk-container-id-2 div.sk-label label {\n",
+              "  font-family: monospace;\n",
+              "  font-weight: bold;\n",
+              "  display: inline-block;\n",
+              "  line-height: 1.2em;\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 div.sk-label-container {\n",
+              "  text-align: center;\n",
+              "}\n",
+              "\n",
+              "/* Estimator-specific */\n",
+              "#sk-container-id-2 div.sk-estimator {\n",
+              "  font-family: monospace;\n",
+              "  border: 1px dotted var(--sklearn-color-border-box);\n",
+              "  border-radius: 0.25em;\n",
+              "  box-sizing: border-box;\n",
+              "  margin-bottom: 0.5em;\n",
+              "  /* unfitted */\n",
+              "  background-color: var(--sklearn-color-unfitted-level-0);\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 div.sk-estimator.fitted {\n",
+              "  /* fitted */\n",
+              "  background-color: var(--sklearn-color-fitted-level-0);\n",
+              "}\n",
+              "\n",
+              "/* on hover */\n",
+              "#sk-container-id-2 div.sk-estimator:hover {\n",
+              "  /* unfitted */\n",
+              "  background-color: var(--sklearn-color-unfitted-level-2);\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 div.sk-estimator.fitted:hover {\n",
+              "  /* fitted */\n",
+              "  background-color: var(--sklearn-color-fitted-level-2);\n",
+              "}\n",
+              "\n",
+              "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
+              "\n",
+              "/* Common style for \"i\" and \"?\" */\n",
+              "\n",
+              ".sk-estimator-doc-link,\n",
+              "a:link.sk-estimator-doc-link,\n",
+              "a:visited.sk-estimator-doc-link {\n",
+              "  float: right;\n",
+              "  font-size: smaller;\n",
+              "  line-height: 1em;\n",
+              "  font-family: monospace;\n",
+              "  background-color: var(--sklearn-color-background);\n",
+              "  border-radius: 1em;\n",
+              "  height: 1em;\n",
+              "  width: 1em;\n",
+              "  text-decoration: none !important;\n",
+              "  margin-left: 1ex;\n",
+              "  /* unfitted */\n",
+              "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
+              "  color: var(--sklearn-color-unfitted-level-1);\n",
+              "}\n",
+              "\n",
+              ".sk-estimator-doc-link.fitted,\n",
+              "a:link.sk-estimator-doc-link.fitted,\n",
+              "a:visited.sk-estimator-doc-link.fitted {\n",
+              "  /* fitted */\n",
+              "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
+              "  color: var(--sklearn-color-fitted-level-1);\n",
+              "}\n",
+              "\n",
+              "/* On hover */\n",
+              "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
+              ".sk-estimator-doc-link:hover,\n",
+              "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
+              ".sk-estimator-doc-link:hover {\n",
+              "  /* unfitted */\n",
+              "  background-color: var(--sklearn-color-unfitted-level-3);\n",
+              "  color: var(--sklearn-color-background);\n",
+              "  text-decoration: none;\n",
+              "}\n",
+              "\n",
+              "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
+              ".sk-estimator-doc-link.fitted:hover,\n",
+              "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
+              ".sk-estimator-doc-link.fitted:hover {\n",
+              "  /* fitted */\n",
+              "  background-color: var(--sklearn-color-fitted-level-3);\n",
+              "  color: var(--sklearn-color-background);\n",
+              "  text-decoration: none;\n",
+              "}\n",
+              "\n",
+              "/* Span, style for the box shown on hovering the info icon */\n",
+              ".sk-estimator-doc-link span {\n",
+              "  display: none;\n",
+              "  z-index: 9999;\n",
+              "  position: relative;\n",
+              "  font-weight: normal;\n",
+              "  right: .2ex;\n",
+              "  padding: .5ex;\n",
+              "  margin: .5ex;\n",
+              "  width: min-content;\n",
+              "  min-width: 20ex;\n",
+              "  max-width: 50ex;\n",
+              "  color: var(--sklearn-color-text);\n",
+              "  box-shadow: 2pt 2pt 4pt #999;\n",
+              "  /* unfitted */\n",
+              "  background: var(--sklearn-color-unfitted-level-0);\n",
+              "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
+              "}\n",
+              "\n",
+              ".sk-estimator-doc-link.fitted span {\n",
+              "  /* fitted */\n",
+              "  background: var(--sklearn-color-fitted-level-0);\n",
+              "  border: var(--sklearn-color-fitted-level-3);\n",
+              "}\n",
+              "\n",
+              ".sk-estimator-doc-link:hover span {\n",
+              "  display: block;\n",
+              "}\n",
+              "\n",
+              "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
+              "\n",
+              "#sk-container-id-2 a.estimator_doc_link {\n",
+              "  float: right;\n",
+              "  font-size: 1rem;\n",
+              "  line-height: 1em;\n",
+              "  font-family: monospace;\n",
+              "  background-color: var(--sklearn-color-background);\n",
+              "  border-radius: 1rem;\n",
+              "  height: 1rem;\n",
+              "  width: 1rem;\n",
+              "  text-decoration: none;\n",
+              "  /* unfitted */\n",
+              "  color: var(--sklearn-color-unfitted-level-1);\n",
+              "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 a.estimator_doc_link.fitted {\n",
+              "  /* fitted */\n",
+              "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
+              "  color: var(--sklearn-color-fitted-level-1);\n",
+              "}\n",
+              "\n",
+              "/* On hover */\n",
+              "#sk-container-id-2 a.estimator_doc_link:hover {\n",
+              "  /* unfitted */\n",
+              "  background-color: var(--sklearn-color-unfitted-level-3);\n",
+              "  color: var(--sklearn-color-background);\n",
+              "  text-decoration: none;\n",
+              "}\n",
+              "\n",
+              "#sk-container-id-2 a.estimator_doc_link.fitted:hover {\n",
+              "  /* fitted */\n",
+              "  background-color: var(--sklearn-color-fitted-level-3);\n",
+              "}\n",
+              "</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>GridSearchCV(cv=2,\n",
+              "             estimator=XGBRegressor(base_score=None, booster=None,\n",
+              "                                    callbacks=None, colsample_bylevel=None,\n",
+              "                                    colsample_bynode=None,\n",
+              "                                    colsample_bytree=None, device=None,\n",
+              "                                    early_stopping_rounds=None,\n",
+              "                                    enable_categorical=False, eval_metric=None,\n",
+              "                                    feature_types=None, gamma=None,\n",
+              "                                    grow_policy=None, importance_type=None,\n",
+              "                                    interaction_constraints=None,\n",
+              "                                    learning_rate=None, m...\n",
+              "                                    min_child_weight=None, missing=nan,\n",
+              "                                    monotone_constraints=None,\n",
+              "                                    multi_strategy=None, n_estimators=None,\n",
+              "                                    n_jobs=None, num_parallel_tree=None,\n",
+              "                                    predictor=&#x27;gpu_predictor&#x27;, ...),\n",
+              "             n_jobs=-1,\n",
+              "             param_grid={&#x27;colsample_bytree&#x27;: [0.6, 0.7, 0.8],\n",
+              "                         &#x27;learning_rate&#x27;: [0.01, 0.05, 0.1],\n",
+              "                         &#x27;max_depth&#x27;: [3, 5, 7],\n",
+              "                         &#x27;n_estimators&#x27;: [100, 200, 300],\n",
+              "                         &#x27;subsample&#x27;: [0.7, 0.8, 0.9]},\n",
+              "             scoring=&#x27;neg_mean_squared_error&#x27;, verbose=2)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;GridSearchCV<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.model_selection.GridSearchCV.html\">?<span>Documentation for GridSearchCV</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>GridSearchCV(cv=2,\n",
+              "             estimator=XGBRegressor(base_score=None, booster=None,\n",
+              "                                    callbacks=None, colsample_bylevel=None,\n",
+              "                                    colsample_bynode=None,\n",
+              "                                    colsample_bytree=None, device=None,\n",
+              "                                    early_stopping_rounds=None,\n",
+              "                                    enable_categorical=False, eval_metric=None,\n",
+              "                                    feature_types=None, gamma=None,\n",
+              "                                    grow_policy=None, importance_type=None,\n",
+              "                                    interaction_constraints=None,\n",
+              "                                    learning_rate=None, m...\n",
+              "                                    min_child_weight=None, missing=nan,\n",
+              "                                    monotone_constraints=None,\n",
+              "                                    multi_strategy=None, n_estimators=None,\n",
+              "                                    n_jobs=None, num_parallel_tree=None,\n",
+              "                                    predictor=&#x27;gpu_predictor&#x27;, ...),\n",
+              "             n_jobs=-1,\n",
+              "             param_grid={&#x27;colsample_bytree&#x27;: [0.6, 0.7, 0.8],\n",
+              "                         &#x27;learning_rate&#x27;: [0.01, 0.05, 0.1],\n",
+              "                         &#x27;max_depth&#x27;: [3, 5, 7],\n",
+              "                         &#x27;n_estimators&#x27;: [100, 200, 300],\n",
+              "                         &#x27;subsample&#x27;: [0.7, 0.8, 0.9]},\n",
+              "             scoring=&#x27;neg_mean_squared_error&#x27;, verbose=2)</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">best_estimator_: XGBRegressor</label><div class=\"sk-toggleable__content fitted\"><pre>XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
+              "             colsample_bylevel=None, colsample_bynode=None,\n",
+              "             colsample_bytree=0.8, device=None, early_stopping_rounds=None,\n",
+              "             enable_categorical=False, eval_metric=None, feature_types=None,\n",
+              "             gamma=None, grow_policy=None, importance_type=None,\n",
+              "             interaction_constraints=None, learning_rate=0.1, max_bin=None,\n",
+              "             max_cat_threshold=None, max_cat_to_onehot=None,\n",
+              "             max_delta_step=None, max_depth=5, max_leaves=None,\n",
+              "             min_child_weight=None, missing=nan, monotone_constraints=None,\n",
+              "             multi_strategy=None, n_estimators=300, n_jobs=None,\n",
+              "             num_parallel_tree=None, predictor=&#x27;gpu_predictor&#x27;, ...)</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">XGBRegressor</label><div class=\"sk-toggleable__content fitted\"><pre>XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
+              "             colsample_bylevel=None, colsample_bynode=None,\n",
+              "             colsample_bytree=0.8, device=None, early_stopping_rounds=None,\n",
+              "             enable_categorical=False, eval_metric=None, feature_types=None,\n",
+              "             gamma=None, grow_policy=None, importance_type=None,\n",
+              "             interaction_constraints=None, learning_rate=0.1, max_bin=None,\n",
+              "             max_cat_threshold=None, max_cat_to_onehot=None,\n",
+              "             max_delta_step=None, max_depth=5, max_leaves=None,\n",
+              "             min_child_weight=None, missing=nan, monotone_constraints=None,\n",
+              "             multi_strategy=None, n_estimators=300, n_jobs=None,\n",
+              "             num_parallel_tree=None, predictor=&#x27;gpu_predictor&#x27;, ...)</pre></div> </div></div></div></div></div></div></div></div></div>"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 78
+        }
+      ],
+      "source": [
+        "xgb = XGBRegressor(\n",
+        "    tree_method='gpu_hist',\n",
+        "    predictor='gpu_predictor',\n",
+        "    verbosity=2\n",
+        ")\n",
+        "\n",
+        "\n",
+        "params = {\n",
+        "    'n_estimators': [100, 200, 300],\n",
+        "    'learning_rate': [0.01, 0.05, 0.1],\n",
+        "    'max_depth': [3, 5, 7],\n",
+        "    'subsample': [0.7, 0.8, 0.9],\n",
+        "    'colsample_bytree': [0.6, 0.7, 0.8]\n",
+        "}\n",
+        "\n",
+        "\n",
+        "grid_search = GridSearchCV(\n",
+        "    estimator=xgb,\n",
+        "    param_grid=params,\n",
+        "    cv=2,\n",
+        "    scoring='neg_mean_squared_error',\n",
+        "    verbose=2,\n",
+        "    n_jobs=-1\n",
+        ")\n",
+        "\n",
+        "grid_search.fit(x_train, y_train)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "best_model = grid_search.best_estimator_\n",
+        "y_pred = best_model.predict(x_test)\n",
+        "\n",
+        "mse = mean_squared_error(y_test, y_pred)\n",
+        "print(\"Mean Squared Error on the test set: \", mse)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "G1aPfPxKIzYY",
+        "outputId": "a8a91b18-7570-47d7-a483-881ddf7dd1d6"
+      },
+      "execution_count": 79,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Mean Squared Error on the test set:  17201.635761885165\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/xgboost/core.py:158: UserWarning: [15:56:53] WARNING: /workspace/src/common/error_msg.cc:27: The tree method `gpu_hist` is deprecated since 2.0.0. To use GPU training, set the `device` parameter to CUDA instead.\n",
+            "\n",
+            "    E.g. tree_method = \"hist\", device = \"cuda\"\n",
+            "\n",
+            "  warnings.warn(smsg, UserWarning)\n",
+            "/usr/local/lib/python3.10/dist-packages/xgboost/core.py:158: UserWarning: [15:56:53] WARNING: /workspace/src/common/error_msg.cc:58: Falling back to prediction using DMatrix due to mismatched devices. This might lead to higher memory usage and slower performance. XGBoost is running on: cuda:0, while the input data is on: cpu.\n",
+            "Potential solutions:\n",
+            "- Use a data structure that matches the device ordinal in the booster.\n",
+            "- Set the device for booster before call to inplace_predict.\n",
+            "\n",
+            "This warning will only be shown once.\n",
+            "\n",
+            "  warnings.warn(smsg, UserWarning)\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "columns_to_encode"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "h23QhE7Gd3E4",
+        "outputId": "12854042-465a-41ee-b49f-84e3375baab8"
+      },
+      "execution_count": 112,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "Index(['MONATSZAHL', 'AUSPRAEGUNG', 'JAHR', 'MONAT'], dtype='object')"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 112
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def _inference(MONATSZAHL, AUSPRAEGUNG, JAHR, MONAT, encoder, columns_to_encode):\n",
+        "\n",
+        "  temp_df = pd.DataFrame({\n",
+        "      'MONATSZAHL': [MONATSZAHL],\n",
+        "      'AUSPRAEGUNG': [AUSPRAEGUNG],\n",
+        "      'JAHR': [JAHR],\n",
+        "      'MONAT': [MONAT]\n",
+        "  })\n",
+        "\n",
+        "  temp_df_copy = temp_df.copy()\n",
+        "\n",
+        "  processed_df = convert_date(temp_df_copy)\n",
+        "\n",
+        "  encoded_columns = encoder.transform(processed_df[columns_to_encode])\n",
+        "\n",
+        "  encoded_column_names = encoder.get_feature_names_out(columns_to_encode)\n",
+        "\n",
+        "  encoded_df = pd.DataFrame(encoded_columns, columns=encoded_column_names, index=processed_df.index)\n",
+        "\n",
+        "  final_df = pd.concat([processed_df.drop(columns=columns_to_encode),encoded_df], axis=1)\n",
+        "\n",
+        "  predictions = best_model.predict(final_df)\n",
+        "\n",
+        "  return predictions\n",
+        "\n",
+        "result = _inference('Alkoholunfälle', 'Verletzte und Getötete', 2012, '201207', encoder, columns_to_encode)\n"
+      ],
+      "metadata": {
+        "id": "ICDOC1WIX8As"
+      },
+      "execution_count": 116,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "result"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "u2ROrrC6fvo2",
+        "outputId": "05bb5bb0-018a-4bb8-e06a-a53e383a9d58"
+      },
+      "execution_count": 119,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "array([33.349915], dtype=float32)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 119
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Storing the model & the encoder to be used in our api endpoint"
+      ],
+      "metadata": {
+        "id": "zOa3pLT3a08b"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 118,
+      "metadata": {
+        "id": "EhSjmlIAOAGk"
+      },
+      "outputs": [],
+      "source": [
+        "with open('model.pkl', 'wb') as file:\n",
+        "    pickle.dump(best_model, file)\n",
+        "\n",
+        "with open('encoder.pkl', 'wb') as file:\n",
+        "    pickle.dump(encoder, file)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "JJsb2Rhve6CL"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "gpuType": "T4",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.12.6"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file