{ "cells": [ { "cell_type": "code", "execution_count": 106, "metadata": { "id": "f-ERaM64ONeC" }, "outputs": [], "source": [ "# preprocess csv\n", "import pandas as pd\n", "filename = '/content/U3_Metadaten.csv'\n", "df = pd.read_csv(filename, on_bad_lines='skip')" ] }, { "cell_type": "code", "execution_count": 118, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "id": "AYxRURTvQiFb", "outputId": "18bf4139-47ac-4939-e635-9f09f560200c" }, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { "summary": "{\n \"name\": \"clean_df\",\n \"rows\": 158,\n \"fields\": [\n {\n \"column\": \"Name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 158,\n \"samples\": [\n \"ISB-020-U3-W-R-01-B17012-028-000\",\n \"ISB-020-U3-W-L-01-B15100-018-000\",\n \"ISB-020-U3-W-R-01-B17012-034-000\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Beschreibung\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 40,\n \"samples\": [\n \"Foto\",\n \"Bodenheizung / Ventileinstellung / FBH AB PM\",\n \"Foto - Novocon S demontiert und Stellenantriebe montiert!\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Disziplin\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"D - Datennetz\",\n \"E - Elektroanlagen\",\n \"S - Sanitaer\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", "type": "dataframe", "variable_name": "clean_df" }, "text/html": [ "\n", " <div id=\"df-3f4ad131-d55b-46a5-8dff-6fa3e12c15b0\" class=\"colab-df-container\">\n", " <div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Name</th>\n", " <th>Beschreibung</th>\n", " <th>Disziplin</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>ISB-020-U3-W-D-01-B07005-001-000</td>\n", " <td>Bauarten und Stuecknachweis SGK</td>\n", " <td>D - Datennetz</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>ISB-020-U3-W-D-01-B07005-002-000</td>\n", " <td>Bauarten und Stuecknachweis SGK</td>\n", " <td>D - Datennetz</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>ISB-020-U3-W-D-01-B07005-003-000</td>\n", " <td>Pruefprotokoll nach DIN EN 61439-1/3</td>\n", " <td>D - Datennetz</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>ISB-020-U3-W-D-01-B07005-004-000</td>\n", " <td>Pruefprotokoll nach DIN EN 61439-1/3</td>\n", " <td>D - Datennetz</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>ISB-020-U3-W-D-01-B18012-001-000</td>\n", " <td>Sicherungslegende G-020 U3 779-AS 1</td>\n", " <td>D - Datennetz</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>153</th>\n", " <td>ISB-020-U3-W-S-01-B17012-008-000</td>\n", " <td>Foto</td>\n", " <td>S - Sanitaer</td>\n", " </tr>\n", " <tr>\n", " <th>159</th>\n", " <td>ISB-020-U3-W-S-01-B17012-010-000</td>\n", " <td>Foto</td>\n", " <td>S - Sanitaer</td>\n", " </tr>\n", " <tr>\n", " <th>160</th>\n", " <td>ISB-020-U3-W-S-01-B17012-011-000</td>\n", " <td>Foto</td>\n", " <td>S - Sanitaer</td>\n", " </tr>\n", " <tr>\n", " <th>161</th>\n", " <td>ISB-020-U3-W-S-01-B18003-001-020</td>\n", " <td>Schieber / Hawle / Schieber 4000 + Handrad 780...</td>\n", " <td>S - Sanitaer</td>\n", " </tr>\n", " <tr>\n", " <th>162</th>\n", " <td>ISB-020-U3-W-S-01-B19009-001-020</td>\n", " <td>Schieber / Hawle / 4000 Schutzraum</td>\n", " <td>S - Sanitaer</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>158 rows × 3 columns</p>\n", "</div>\n", " <div class=\"colab-df-buttons\">\n", "\n", " <div class=\"colab-df-container\">\n", " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-3f4ad131-d55b-46a5-8dff-6fa3e12c15b0')\"\n", " title=\"Convert this dataframe to an interactive table.\"\n", " style=\"display:none;\">\n", "\n", " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", " </svg>\n", " </button>\n", "\n", " <style>\n", " .colab-df-container {\n", " display:flex;\n", " gap: 12px;\n", " }\n", "\n", " .colab-df-convert {\n", " background-color: #E8F0FE;\n", " border: none;\n", " border-radius: 50%;\n", " cursor: pointer;\n", " display: none;\n", " fill: #1967D2;\n", " height: 32px;\n", " padding: 0 0 0 0;\n", " width: 32px;\n", " }\n", "\n", " .colab-df-convert:hover {\n", " background-color: #E2EBFA;\n", " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", " fill: #174EA6;\n", " }\n", "\n", " .colab-df-buttons div {\n", " margin-bottom: 4px;\n", " }\n", "\n", " [theme=dark] .colab-df-convert {\n", " background-color: #3B4455;\n", " fill: #D2E3FC;\n", " }\n", "\n", " [theme=dark] .colab-df-convert:hover {\n", " background-color: #434B5C;\n", " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", " fill: #FFFFFF;\n", " }\n", " </style>\n", "\n", " <script>\n", " const buttonEl =\n", " document.querySelector('#df-3f4ad131-d55b-46a5-8dff-6fa3e12c15b0 button.colab-df-convert');\n", " buttonEl.style.display =\n", " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", "\n", " async function convertToInteractive(key) {\n", " const element = document.querySelector('#df-3f4ad131-d55b-46a5-8dff-6fa3e12c15b0');\n", " const dataTable =\n", " await google.colab.kernel.invokeFunction('convertToInteractive',\n", " [key], {});\n", " if (!dataTable) return;\n", "\n", " const docLinkHtml = 'Like what you see? Visit the ' +\n", " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", " + ' to learn more about interactive tables.';\n", " element.innerHTML = '';\n", " dataTable['output_type'] = 'display_data';\n", " await google.colab.output.renderOutput(dataTable, element);\n", " const docLink = document.createElement('div');\n", " docLink.innerHTML = docLinkHtml;\n", " element.appendChild(docLink);\n", " }\n", " </script>\n", " </div>\n", "\n", "\n", "<div id=\"df-518b8ddb-11a0-49a2-8903-71e4063ca189\">\n", " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-518b8ddb-11a0-49a2-8903-71e4063ca189')\"\n", " title=\"Suggest charts\"\n", " style=\"display:none;\">\n", "\n", "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", " width=\"24px\">\n", " <g>\n", " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", " </g>\n", "</svg>\n", " </button>\n", "\n", "<style>\n", " .colab-df-quickchart {\n", " --bg-color: #E8F0FE;\n", " --fill-color: #1967D2;\n", " --hover-bg-color: #E2EBFA;\n", " --hover-fill-color: #174EA6;\n", " --disabled-fill-color: #AAA;\n", " --disabled-bg-color: #DDD;\n", " }\n", "\n", " [theme=dark] .colab-df-quickchart {\n", " --bg-color: #3B4455;\n", " --fill-color: #D2E3FC;\n", " --hover-bg-color: #434B5C;\n", " --hover-fill-color: #FFFFFF;\n", " --disabled-bg-color: #3B4455;\n", " --disabled-fill-color: #666;\n", " }\n", "\n", " .colab-df-quickchart {\n", " background-color: var(--bg-color);\n", " border: none;\n", " border-radius: 50%;\n", " cursor: pointer;\n", " display: none;\n", " fill: var(--fill-color);\n", " height: 32px;\n", " padding: 0;\n", " width: 32px;\n", " }\n", "\n", " .colab-df-quickchart:hover {\n", " background-color: var(--hover-bg-color);\n", " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", " fill: var(--button-hover-fill-color);\n", " }\n", "\n", " .colab-df-quickchart-complete:disabled,\n", " .colab-df-quickchart-complete:disabled:hover {\n", " background-color: var(--disabled-bg-color);\n", " fill: var(--disabled-fill-color);\n", " box-shadow: none;\n", " }\n", "\n", " .colab-df-spinner {\n", " border: 2px solid var(--fill-color);\n", " border-color: transparent;\n", " border-bottom-color: var(--fill-color);\n", " animation:\n", " spin 1s steps(1) infinite;\n", " }\n", "\n", " @keyframes spin {\n", " 0% {\n", " border-color: transparent;\n", " border-bottom-color: var(--fill-color);\n", " border-left-color: var(--fill-color);\n", " }\n", " 20% {\n", " border-color: transparent;\n", " border-left-color: var(--fill-color);\n", " border-top-color: var(--fill-color);\n", " }\n", " 30% {\n", " border-color: transparent;\n", " border-left-color: var(--fill-color);\n", " border-top-color: var(--fill-color);\n", " border-right-color: var(--fill-color);\n", " }\n", " 40% {\n", " border-color: transparent;\n", " border-right-color: var(--fill-color);\n", " border-top-color: var(--fill-color);\n", " }\n", " 60% {\n", " border-color: transparent;\n", " border-right-color: var(--fill-color);\n", " }\n", " 80% {\n", " border-color: transparent;\n", " border-right-color: var(--fill-color);\n", " border-bottom-color: var(--fill-color);\n", " }\n", " 90% {\n", " border-color: transparent;\n", " border-bottom-color: var(--fill-color);\n", " }\n", " }\n", "</style>\n", "\n", " <script>\n", " async function quickchart(key) {\n", " const quickchartButtonEl =\n", " document.querySelector('#' + key + ' button');\n", " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", " quickchartButtonEl.classList.add('colab-df-spinner');\n", " try {\n", " const charts = await google.colab.kernel.invokeFunction(\n", " 'suggestCharts', [key], {});\n", " } catch (error) {\n", " console.error('Error during call to suggestCharts:', error);\n", " }\n", " quickchartButtonEl.classList.remove('colab-df-spinner');\n", " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", " }\n", " (() => {\n", " let quickchartButtonEl =\n", " document.querySelector('#df-518b8ddb-11a0-49a2-8903-71e4063ca189 button');\n", " quickchartButtonEl.style.display =\n", " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", " })();\n", " </script>\n", "</div>\n", "\n", " <div id=\"id_5f410c26-0cce-4d03-86e0-353ac70a1d74\">\n", " <style>\n", " .colab-df-generate {\n", " background-color: #E8F0FE;\n", " border: none;\n", " border-radius: 50%;\n", " cursor: pointer;\n", " display: none;\n", " fill: #1967D2;\n", " height: 32px;\n", " padding: 0 0 0 0;\n", " width: 32px;\n", " }\n", "\n", " .colab-df-generate:hover {\n", " background-color: #E2EBFA;\n", " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", " fill: #174EA6;\n", " }\n", "\n", " [theme=dark] .colab-df-generate {\n", " background-color: #3B4455;\n", " fill: #D2E3FC;\n", " }\n", "\n", " [theme=dark] .colab-df-generate:hover {\n", " background-color: #434B5C;\n", " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", " fill: #FFFFFF;\n", " }\n", " </style>\n", " <button class=\"colab-df-generate\" onclick=\"generateWithVariable('clean_df')\"\n", " title=\"Generate code using this dataframe.\"\n", " style=\"display:none;\">\n", "\n", " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", " width=\"24px\">\n", " <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n", " </svg>\n", " </button>\n", " <script>\n", " (() => {\n", " const buttonEl =\n", " document.querySelector('#id_5f410c26-0cce-4d03-86e0-353ac70a1d74 button.colab-df-generate');\n", " buttonEl.style.display =\n", " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", "\n", " buttonEl.onclick = () => {\n", " google.colab.notebook.generateWithVariable('clean_df');\n", " }\n", " })();\n", " </script>\n", " </div>\n", "\n", " </div>\n", " </div>\n" ], "text/plain": [ " Name \\\n", "0 ISB-020-U3-W-D-01-B07005-001-000 \n", "1 ISB-020-U3-W-D-01-B07005-002-000 \n", "2 ISB-020-U3-W-D-01-B07005-003-000 \n", "3 ISB-020-U3-W-D-01-B07005-004-000 \n", "4 ISB-020-U3-W-D-01-B18012-001-000 \n", ".. ... \n", "153 ISB-020-U3-W-S-01-B17012-008-000 \n", "159 ISB-020-U3-W-S-01-B17012-010-000 \n", "160 ISB-020-U3-W-S-01-B17012-011-000 \n", "161 ISB-020-U3-W-S-01-B18003-001-020 \n", "162 ISB-020-U3-W-S-01-B19009-001-020 \n", "\n", " Beschreibung Disziplin \n", "0 Bauarten und Stuecknachweis SGK D - Datennetz \n", "1 Bauarten und Stuecknachweis SGK D - Datennetz \n", "2 Pruefprotokoll nach DIN EN 61439-1/3 D - Datennetz \n", "3 Pruefprotokoll nach DIN EN 61439-1/3 D - Datennetz \n", "4 Sicherungslegende G-020 U3 779-AS 1 D - Datennetz \n", ".. ... ... \n", "153 Foto S - Sanitaer \n", "159 Foto S - Sanitaer \n", "160 Foto S - Sanitaer \n", "161 Schieber / Hawle / Schieber 4000 + Handrad 780... S - Sanitaer \n", "162 Schieber / Hawle / 4000 Schutzraum S - Sanitaer \n", "\n", "[158 rows x 3 columns]" ] }, "execution_count": 118, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# drop all columns except name, description, discipline\n", "features = ['Name', 'Beschreibung', 'Disziplin']\n", "# Remove rows with NaN values\n", "clean_df = df[features].dropna()\n", "clean_df" ] }, { "cell_type": "code", "execution_count": 143, "metadata": { "id": "_PtvbAskQa72" }, "outputs": [], "source": [ "clean_df.to_csv('name-description-discipline-data.csv')" ] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }