{ "cells": [ { "cell_type": "markdown", "id": "818b53f6-ce03-41ae-8318-ab53be1d8916", "metadata": {}, "source": [ "# Conversion of the Latest Dataframe to Parquet\n", "\n", "We need to store our dataset in a warehouse so we use parquet" ] }, { "cell_type": "code", "execution_count": 11, "id": "1c4a0c5a-ea47-4a91-a0de-de46b70fe9b0", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import pyarrow as pa\n", "import pyarrow.parquet as pq\n", "\n", "# Load the Pandas DataFrame\n", "df = pd.read_csv('dialogues.csv', sep='\\t', encoding='utf-8')" ] }, { "cell_type": "code", "execution_count": 12, "id": "d802024b-461f-4853-9e7c-229581a11836", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "DataFrame saved to Parquet file: ./data/parquet/dialogues.parquet\n" ] } ], "source": [ "# Convert Pandas DataFrame to Arrow Table\n", "table = pa.Table.from_pandas(df)\n", "# Specify the output file path for the Parquet file\n", "parquet_file_path = './data/parquet/dialogues.parquet'\n", "\n", "# Write the Arrow Table to a Parquet file\n", "pq.write_table(table, parquet_file_path)\n", "\n", "print(f'DataFrame saved to Parquet file: {parquet_file_path}')" ] }, { "cell_type": "code", "execution_count": 13, "id": "78704d3a-a812-4f20-8103-357575211b1e", "metadata": {}, "outputs": [], "source": [ "# Read Parquet file into Arrow Table\n", "table = pq.read_table(parquet_file_path)\n", "\n", "# Convert Arrow Table to Pandas DataFrame\n", "df = table.to_pandas()\n" ] }, { "cell_type": "code", "execution_count": 15, "id": "87fb4487-e633-497d-b490-f39a61ef3bbc", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Description | \n", "Patient | \n", "Doctor | \n", "
---|---|---|---|
0 | \n", "Q. What does abutment of the nerve root mean? | \n", "Hi doctor,I am just wondering what is abutting... | \n", "Hi. I have gone through your query with dilige... | \n", "
1 | \n", "Q. What should I do to reduce my weight gained... | \n", "Hi doctor, I am a 22-year-old female who was d... | \n", "Hi. You have really done well with the hypothy... | \n", "
2 | \n", "Q. I have started to get lots of acne on my fa... | \n", "Hi doctor! I used to have clear skin but since... | \n", "Hi there Acne has multifactorial etiology. Onl... | \n", "
3 | \n", "Q. Why do I have uncomfortable feeling between... | \n", "Hello doctor,I am having an uncomfortable feel... | \n", "Hello. The popping and discomfort what you fel... | \n", "
4 | \n", "Q. My symptoms after intercourse threatns me e... | \n", "Hello doctor,Before two years had sex with a c... | \n", "Hello. The HIV test uses a finger prick blood ... | \n", "