{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "source": [ "! pip install faker" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "b4GuAy2rBGxs", "outputId": "f78fa63f-63f8-4350-f216-3f671622e0cf" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting faker\n", " Downloading Faker-24.4.0-py3-none-any.whl (1.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: python-dateutil>=2.4 in /usr/local/lib/python3.10/dist-packages (from faker) (2.8.2)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.4->faker) (1.16.0)\n", "Installing collected packages: faker\n", "Successfully installed faker-24.4.0\n" ] } ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "o8w2xsKkA72N" }, "outputs": [], "source": [ "import pandas as pd\n", "import random\n", "from faker import Faker\n", "from datetime import datetime, timedelta\n", "from tqdm import tqdm" ] }, { "cell_type": "code", "source": [ "# Initialize Faker\n", "fake = Faker()\n", "\n", "# Generate data\n", "data = []\n", "\n", "for _ in tqdm(range(100)): # Changed from 20 to 100 to generate 100 rows\n", " first_name = fake.first_name()\n", " last_name = fake.last_name()\n", " policy_no = fake.unique.random_number(digits=8, fix_len=True)\n", " gender = random.choice(['Male', 'Female', 'Other'])\n", " address = fake.address()\n", " state = fake.state()\n", " physician_office_address = fake.address()\n", " time_of_test = fake.date_time_this_decade().strftime('%Y-%m-%d %H:%M:%S') # Formatted time\n", "\n", " # Random medical test results\n", " a1c = round(random.uniform(4.0, 14.0), 1) # A1C levels\n", " glucose = round(random.uniform(70, 150), 1) # Glucose levels\n", " calcium = round(random.uniform(8.5, 10.2), 1) # Calcium levels\n", " electrolytes = \"Normal\" if random.choice([True, False]) else \"Abnormal\" # Electrolytes status\n", " cholesterol = round(random.uniform(125, 240), 1) # Cholesterol levels\n", " esr = random.randint(0, 100) # ESR levels\n", "\n", " # Aggregating BMP results\n", " bmp_glucose = f\"Glucose: {glucose}\"\n", " bmp_calcium = f\"Calcium: {calcium}\"\n", " bmp_electrolytes = f\"Electrolytes: {electrolytes}\"\n", "\n", " data.append([first_name, last_name, policy_no, gender, address, state, physician_office_address, time_of_test, a1c, bmp_glucose, bmp_calcium, bmp_electrolytes, cholesterol, esr])" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "q0JyotoZBCw8", "outputId": "bdf3f2d3-1e10-41cc-862c-5a8fcc981a6c" }, "execution_count": 5, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "100%|██████████| 100/100 [00:00<00:00, 541.24it/s]\n" ] } ] }, { "cell_type": "code", "source": [ "# Creating DataFrame\n", "df = pd.DataFrame(data, columns=[\"First Name\", \"Last Name\", \"Policy No.\", \"Gender\", \"Address\", \"State\", \"Physician Office Address\", \"Time of Test\", \"A1C (diabetes)\", \"BMP (glucose)\", \"BMP (calcium)\", \"BMP (electrolytes)\", \"Lipid Panel (cholesterol)\", \"ESR (blood cell clumps)\"])" ], "metadata": { "id": "dRIS5WXJBF7M" }, "execution_count": 6, "outputs": [] }, { "cell_type": "code", "source": [ "df" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 615 }, "id": "-YUuq4OrBO8s", "outputId": "4b2c4bc6-9aca-487b-e6f5-e33d684bf627" }, "execution_count": 8, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " First Name Last Name Policy No. Gender \\\n", "0 John Williams 51405852 Male \n", "1 Matthew Barrett 86698828 Female \n", "2 Michael Williams 97705492 Other \n", "3 Billy Jones 42019134 Other \n", "4 Jessica Lawson 62074003 Female \n", ".. ... ... ... ... \n", "95 Peter Burns 58173378 Other \n", "96 Nicole Campbell 96638320 Female \n", "97 Jeffrey Moore 25800342 Male \n", "98 Albert Collins 38748339 Other \n", "99 Anthony Arias 29343897 Male \n", "\n", " Address State \\\n", "0 06021 Andrews Plains\\nNorth Barbaraview, WA 84509 Texas \n", "1 02997 Deborah Path Suite 795\\nDeborahmouth, NJ... Delaware \n", "2 4519 Joseph Extensions Apt. 395\\nPort William,... Kentucky \n", "3 113 Rivera Plain Apt. 143\\nPort Cindy, CO 74694 South Carolina \n", "4 82657 Ruiz Point\\nLake Christophershire, KS 37543 Delaware \n", ".. ... ... \n", "95 USCGC Parker\\nFPO AA 16320 New York \n", "96 USS Davis\\nFPO AP 68621 Vermont \n", "97 9240 Anthony Flat Apt. 187\\nLake Stephenberg, ... Georgia \n", "98 5230 Gary Motorway\\nWufort, VT 79419 Delaware \n", "99 814 Christopher Meadows\\nPatriciashire, NY 73616 Arkansas \n", "\n", " Physician Office Address Time of Test \\\n", "0 Unit 9742 Box 1185\\nDPO AP 13090 2021-11-25 08:48:21 \n", "1 711 Scott Gateway Suite 873\\nWendyburgh, ME 72114 2020-05-16 18:28:15 \n", "2 732 Bryant Street Suite 463\\nNew Jessicaview, ... 2022-04-14 22:44:55 \n", "3 24170 Merritt Mountain Apt. 882\\nSnydershire, ... 2023-10-09 12:48:11 \n", "4 840 Rachel Freeway\\nPort Robert, NC 93052 2021-01-24 18:34:26 \n", ".. ... ... \n", "95 30807 Mitchell Estates\\nGregorybury, KY 95175 2020-01-16 08:08:59 \n", "96 3771 Maria Bypass Apt. 651\\nLake Lindsey, MD 0... 2023-10-17 19:27:21 \n", "97 2683 Thomas Court Apt. 694\\nRachaelchester, CT... 2020-10-26 14:30:56 \n", "98 79209 Garner Trafficway\\nPort William, KY 91119 2020-03-25 15:18:29 \n", "99 035 Sharon Road Suite 873\\nNorth Jamesmouth, A... 2021-01-07 17:20:17 \n", "\n", " A1C (diabetes) BMP (glucose) BMP (calcium) BMP (electrolytes) \\\n", "0 8.3 Glucose: 105.1 Calcium: 9.4 Electrolytes: Normal \n", "1 4.9 Glucose: 140.1 Calcium: 9.9 Electrolytes: Abnormal \n", "2 8.8 Glucose: 103.6 Calcium: 9.3 Electrolytes: Abnormal \n", "3 6.7 Glucose: 124.4 Calcium: 9.4 Electrolytes: Normal \n", "4 12.1 Glucose: 135.4 Calcium: 8.6 Electrolytes: Abnormal \n", ".. ... ... ... ... \n", "95 8.7 Glucose: 143.4 Calcium: 9.0 Electrolytes: Abnormal \n", "96 5.7 Glucose: 126.7 Calcium: 10.0 Electrolytes: Abnormal \n", "97 9.0 Glucose: 133.6 Calcium: 8.7 Electrolytes: Normal \n", "98 9.2 Glucose: 78.1 Calcium: 9.8 Electrolytes: Abnormal \n", "99 13.7 Glucose: 113.7 Calcium: 8.7 Electrolytes: Normal \n", "\n", " Lipid Panel (cholesterol) ESR (blood cell clumps) \n", "0 217.2 23 \n", "1 165.0 36 \n", "2 183.9 34 \n", "3 176.4 13 \n", "4 129.9 29 \n", ".. ... ... \n", "95 151.7 7 \n", "96 157.2 19 \n", "97 141.0 89 \n", "98 228.4 84 \n", "99 214.2 50 \n", "\n", "[100 rows x 14 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
First NameLast NamePolicy No.GenderAddressStatePhysician Office AddressTime of TestA1C (diabetes)BMP (glucose)BMP (calcium)BMP (electrolytes)Lipid Panel (cholesterol)ESR (blood cell clumps)
0JohnWilliams51405852Male06021 Andrews Plains\\nNorth Barbaraview, WA 84509TexasUnit 9742 Box 1185\\nDPO AP 130902021-11-25 08:48:218.3Glucose: 105.1Calcium: 9.4Electrolytes: Normal217.223
1MatthewBarrett86698828Female02997 Deborah Path Suite 795\\nDeborahmouth, NJ...Delaware711 Scott Gateway Suite 873\\nWendyburgh, ME 721142020-05-16 18:28:154.9Glucose: 140.1Calcium: 9.9Electrolytes: Abnormal165.036
2MichaelWilliams97705492Other4519 Joseph Extensions Apt. 395\\nPort William,...Kentucky732 Bryant Street Suite 463\\nNew Jessicaview, ...2022-04-14 22:44:558.8Glucose: 103.6Calcium: 9.3Electrolytes: Abnormal183.934
3BillyJones42019134Other113 Rivera Plain Apt. 143\\nPort Cindy, CO 74694South Carolina24170 Merritt Mountain Apt. 882\\nSnydershire, ...2023-10-09 12:48:116.7Glucose: 124.4Calcium: 9.4Electrolytes: Normal176.413
4JessicaLawson62074003Female82657 Ruiz Point\\nLake Christophershire, KS 37543Delaware840 Rachel Freeway\\nPort Robert, NC 930522021-01-24 18:34:2612.1Glucose: 135.4Calcium: 8.6Electrolytes: Abnormal129.929
.............................................
95PeterBurns58173378OtherUSCGC Parker\\nFPO AA 16320New York30807 Mitchell Estates\\nGregorybury, KY 951752020-01-16 08:08:598.7Glucose: 143.4Calcium: 9.0Electrolytes: Abnormal151.77
96NicoleCampbell96638320FemaleUSS Davis\\nFPO AP 68621Vermont3771 Maria Bypass Apt. 651\\nLake Lindsey, MD 0...2023-10-17 19:27:215.7Glucose: 126.7Calcium: 10.0Electrolytes: Abnormal157.219
97JeffreyMoore25800342Male9240 Anthony Flat Apt. 187\\nLake Stephenberg, ...Georgia2683 Thomas Court Apt. 694\\nRachaelchester, CT...2020-10-26 14:30:569.0Glucose: 133.6Calcium: 8.7Electrolytes: Normal141.089
98AlbertCollins38748339Other5230 Gary Motorway\\nWufort, VT 79419Delaware79209 Garner Trafficway\\nPort William, KY 911192020-03-25 15:18:299.2Glucose: 78.1Calcium: 9.8Electrolytes: Abnormal228.484
99AnthonyArias29343897Male814 Christopher Meadows\\nPatriciashire, NY 73616Arkansas035 Sharon Road Suite 873\\nNorth Jamesmouth, A...2021-01-07 17:20:1713.7Glucose: 113.7Calcium: 8.7Electrolytes: Normal214.250
\n", "

100 rows × 14 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df", "summary": "{\n \"name\": \"df\",\n \"rows\": 100,\n \"fields\": [\n {\n \"column\": \"First Name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 82,\n \"samples\": [\n \"Daniel\",\n \"John\",\n \"Nancy\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Last Name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 86,\n \"samples\": [\n \"Myers\",\n \"Williams\",\n \"Martinez\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Policy No.\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 24995568,\n \"min\": 10077874,\n \"max\": 97705492,\n \"num_unique_values\": 100,\n \"samples\": [\n 51225828,\n 79531798,\n 17159895\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Gender\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Male\",\n \"Female\",\n \"Other\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Address\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 100,\n \"samples\": [\n \"49286 Norman Trace\\nBaileyburgh, FM 60919\",\n \"67471 Orr Divide Suite 876\\nAlexanderport, IL 62852\",\n \"PSC 8205, Box 3340\\nAPO AP 96934\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"State\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 41,\n \"samples\": [\n \"Colorado\",\n \"Iowa\",\n \"Rhode Island\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Physician Office Address\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 100,\n \"samples\": [\n \"PSC 4665, Box 9364\\nAPO AE 81123\",\n \"Unit 8213 Box 4682\\nDPO AA 62814\",\n \"27278 Robbins Place\\nRonaldville, OK 66693\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Time of Test\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 100,\n \"samples\": [\n \"2022-08-18 11:22:50\",\n \"2022-09-23 17:04:36\",\n \"2023-12-27 13:37:21\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"A1C (diabetes)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.8247956929786455,\n \"min\": 4.6,\n \"max\": 14.0,\n \"num_unique_values\": 62,\n \"samples\": [\n 13.2,\n 9.2,\n 8.3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"BMP (glucose)\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 95,\n \"samples\": [\n \"Glucose: 85.8\",\n \"Glucose: 76.4\",\n \"Glucose: 118.3\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"BMP (calcium)\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 17,\n \"samples\": [\n \"Calcium: 9.4\",\n \"Calcium: 9.9\",\n \"Calcium: 8.9\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"BMP (electrolytes)\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Electrolytes: Abnormal\",\n \"Electrolytes: Normal\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Lipid Panel (cholesterol)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 35.04015605506968,\n \"min\": 127.9,\n \"max\": 239.6,\n \"num_unique_values\": 98,\n \"samples\": [\n 141.8,\n 191.6\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"ESR (blood cell clumps)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 31,\n \"min\": 0,\n \"max\": 100,\n \"num_unique_values\": 63,\n \"samples\": [\n 89,\n 96\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 8 } ] }, { "cell_type": "code", "source": [ "df.to_csv(\"sample_patient_bloodtest_data.csv\", index=False)" ], "metadata": { "id": "8awx6Z9qBPIc" }, "execution_count": 10, "outputs": [] }, { "cell_type": "code", "source": [], "metadata": { "id": "ZXTS62RSBZKi" }, "execution_count": null, "outputs": [] } ] }