{ "cells": [ { "cell_type": "code", "execution_count": 106, "metadata": { "id": "f-ERaM64ONeC" }, "outputs": [], "source": [ "# preprocess csv\n", "import pandas as pd\n", "filename = '/content/U3_Metadaten.csv'\n", "df = pd.read_csv(filename, on_bad_lines='skip')" ] }, { "cell_type": "code", "execution_count": 118, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "id": "AYxRURTvQiFb", "outputId": "18bf4139-47ac-4939-e635-9f09f560200c" }, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { "summary": "{\n \"name\": \"clean_df\",\n \"rows\": 158,\n \"fields\": [\n {\n \"column\": \"Name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 158,\n \"samples\": [\n \"ISB-020-U3-W-R-01-B17012-028-000\",\n \"ISB-020-U3-W-L-01-B15100-018-000\",\n \"ISB-020-U3-W-R-01-B17012-034-000\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Beschreibung\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 40,\n \"samples\": [\n \"Foto\",\n \"Bodenheizung / Ventileinstellung / FBH AB PM\",\n \"Foto - S - Sanitaer \n", "162 Schieber / Hawle / 4000 Schutzraum S - Sanitaer \n", "\n", "[158 rows x 3 columns]" ] }, "execution_count": 118, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# drop all columns except name, description, discipline\n", "features = ['Name', 'Beschreibung', 'Disziplin']\n", "# Remove rows with NaN values\n", "clean_df = df[features].dropna()\n", "clean_df" ] }, { "cell_type": "code", "execution_count": 143, "metadata": { "id": "_PtvbAskQa72" }, "outputs": [], "source": [ "clean_df.to_csv('name-description-discipline-data.csv')" ] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }