eagle0504 commited on
Commit
065856f
·
verified ·
1 Parent(s): 18cc988

Upload ex_ - fake patient bloodtest generator.ipynb

Browse files
data/ex_ - fake patient bloodtest generator.ipynb ADDED
@@ -0,0 +1,737 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": []
7
+ },
8
+ "kernelspec": {
9
+ "name": "python3",
10
+ "display_name": "Python 3"
11
+ },
12
+ "language_info": {
13
+ "name": "python"
14
+ }
15
+ },
16
+ "cells": [
17
+ {
18
+ "cell_type": "code",
19
+ "source": [
20
+ "! pip install faker"
21
+ ],
22
+ "metadata": {
23
+ "colab": {
24
+ "base_uri": "https://localhost:8080/"
25
+ },
26
+ "id": "b4GuAy2rBGxs",
27
+ "outputId": "f78fa63f-63f8-4350-f216-3f671622e0cf"
28
+ },
29
+ "execution_count": 2,
30
+ "outputs": [
31
+ {
32
+ "output_type": "stream",
33
+ "name": "stdout",
34
+ "text": [
35
+ "Collecting faker\n",
36
+ " Downloading Faker-24.4.0-py3-none-any.whl (1.8 MB)\n",
37
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
38
+ "\u001b[?25hRequirement already satisfied: python-dateutil>=2.4 in /usr/local/lib/python3.10/dist-packages (from faker) (2.8.2)\n",
39
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.4->faker) (1.16.0)\n",
40
+ "Installing collected packages: faker\n",
41
+ "Successfully installed faker-24.4.0\n"
42
+ ]
43
+ }
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 4,
49
+ "metadata": {
50
+ "id": "o8w2xsKkA72N"
51
+ },
52
+ "outputs": [],
53
+ "source": [
54
+ "import pandas as pd\n",
55
+ "import random\n",
56
+ "from faker import Faker\n",
57
+ "from datetime import datetime, timedelta\n",
58
+ "from tqdm import tqdm"
59
+ ]
60
+ },
61
+ {
62
+ "cell_type": "code",
63
+ "source": [
64
+ "# Initialize Faker\n",
65
+ "fake = Faker()\n",
66
+ "\n",
67
+ "# Generate data\n",
68
+ "data = []\n",
69
+ "\n",
70
+ "for _ in tqdm(range(100)): # Changed from 20 to 100 to generate 100 rows\n",
71
+ " first_name = fake.first_name()\n",
72
+ " last_name = fake.last_name()\n",
73
+ " policy_no = fake.unique.random_number(digits=8, fix_len=True)\n",
74
+ " gender = random.choice(['Male', 'Female', 'Other'])\n",
75
+ " address = fake.address()\n",
76
+ " state = fake.state()\n",
77
+ " physician_office_address = fake.address()\n",
78
+ " time_of_test = fake.date_time_this_decade().strftime('%Y-%m-%d %H:%M:%S') # Formatted time\n",
79
+ "\n",
80
+ " # Random medical test results\n",
81
+ " a1c = round(random.uniform(4.0, 14.0), 1) # A1C levels\n",
82
+ " glucose = round(random.uniform(70, 150), 1) # Glucose levels\n",
83
+ " calcium = round(random.uniform(8.5, 10.2), 1) # Calcium levels\n",
84
+ " electrolytes = \"Normal\" if random.choice([True, False]) else \"Abnormal\" # Electrolytes status\n",
85
+ " cholesterol = round(random.uniform(125, 240), 1) # Cholesterol levels\n",
86
+ " esr = random.randint(0, 100) # ESR levels\n",
87
+ "\n",
88
+ " # Aggregating BMP results\n",
89
+ " bmp_glucose = f\"Glucose: {glucose}\"\n",
90
+ " bmp_calcium = f\"Calcium: {calcium}\"\n",
91
+ " bmp_electrolytes = f\"Electrolytes: {electrolytes}\"\n",
92
+ "\n",
93
+ " data.append([first_name, last_name, policy_no, gender, address, state, physician_office_address, time_of_test, a1c, bmp_glucose, bmp_calcium, bmp_electrolytes, cholesterol, esr])"
94
+ ],
95
+ "metadata": {
96
+ "colab": {
97
+ "base_uri": "https://localhost:8080/"
98
+ },
99
+ "id": "q0JyotoZBCw8",
100
+ "outputId": "bdf3f2d3-1e10-41cc-862c-5a8fcc981a6c"
101
+ },
102
+ "execution_count": 5,
103
+ "outputs": [
104
+ {
105
+ "output_type": "stream",
106
+ "name": "stderr",
107
+ "text": [
108
+ "100%|██████████| 100/100 [00:00<00:00, 541.24it/s]\n"
109
+ ]
110
+ }
111
+ ]
112
+ },
113
+ {
114
+ "cell_type": "code",
115
+ "source": [
116
+ "# Creating DataFrame\n",
117
+ "df = pd.DataFrame(data, columns=[\"First Name\", \"Last Name\", \"Policy No.\", \"Gender\", \"Address\", \"State\", \"Physician Office Address\", \"Time of Test\", \"A1C (diabetes)\", \"BMP (glucose)\", \"BMP (calcium)\", \"BMP (electrolytes)\", \"Lipid Panel (cholesterol)\", \"ESR (blood cell clumps)\"])"
118
+ ],
119
+ "metadata": {
120
+ "id": "dRIS5WXJBF7M"
121
+ },
122
+ "execution_count": 6,
123
+ "outputs": []
124
+ },
125
+ {
126
+ "cell_type": "code",
127
+ "source": [
128
+ "df"
129
+ ],
130
+ "metadata": {
131
+ "colab": {
132
+ "base_uri": "https://localhost:8080/",
133
+ "height": 615
134
+ },
135
+ "id": "-YUuq4OrBO8s",
136
+ "outputId": "4b2c4bc6-9aca-487b-e6f5-e33d684bf627"
137
+ },
138
+ "execution_count": 8,
139
+ "outputs": [
140
+ {
141
+ "output_type": "execute_result",
142
+ "data": {
143
+ "text/plain": [
144
+ " First Name Last Name Policy No. Gender \\\n",
145
+ "0 John Williams 51405852 Male \n",
146
+ "1 Matthew Barrett 86698828 Female \n",
147
+ "2 Michael Williams 97705492 Other \n",
148
+ "3 Billy Jones 42019134 Other \n",
149
+ "4 Jessica Lawson 62074003 Female \n",
150
+ ".. ... ... ... ... \n",
151
+ "95 Peter Burns 58173378 Other \n",
152
+ "96 Nicole Campbell 96638320 Female \n",
153
+ "97 Jeffrey Moore 25800342 Male \n",
154
+ "98 Albert Collins 38748339 Other \n",
155
+ "99 Anthony Arias 29343897 Male \n",
156
+ "\n",
157
+ " Address State \\\n",
158
+ "0 06021 Andrews Plains\\nNorth Barbaraview, WA 84509 Texas \n",
159
+ "1 02997 Deborah Path Suite 795\\nDeborahmouth, NJ... Delaware \n",
160
+ "2 4519 Joseph Extensions Apt. 395\\nPort William,... Kentucky \n",
161
+ "3 113 Rivera Plain Apt. 143\\nPort Cindy, CO 74694 South Carolina \n",
162
+ "4 82657 Ruiz Point\\nLake Christophershire, KS 37543 Delaware \n",
163
+ ".. ... ... \n",
164
+ "95 USCGC Parker\\nFPO AA 16320 New York \n",
165
+ "96 USS Davis\\nFPO AP 68621 Vermont \n",
166
+ "97 9240 Anthony Flat Apt. 187\\nLake Stephenberg, ... Georgia \n",
167
+ "98 5230 Gary Motorway\\nWufort, VT 79419 Delaware \n",
168
+ "99 814 Christopher Meadows\\nPatriciashire, NY 73616 Arkansas \n",
169
+ "\n",
170
+ " Physician Office Address Time of Test \\\n",
171
+ "0 Unit 9742 Box 1185\\nDPO AP 13090 2021-11-25 08:48:21 \n",
172
+ "1 711 Scott Gateway Suite 873\\nWendyburgh, ME 72114 2020-05-16 18:28:15 \n",
173
+ "2 732 Bryant Street Suite 463\\nNew Jessicaview, ... 2022-04-14 22:44:55 \n",
174
+ "3 24170 Merritt Mountain Apt. 882\\nSnydershire, ... 2023-10-09 12:48:11 \n",
175
+ "4 840 Rachel Freeway\\nPort Robert, NC 93052 2021-01-24 18:34:26 \n",
176
+ ".. ... ... \n",
177
+ "95 30807 Mitchell Estates\\nGregorybury, KY 95175 2020-01-16 08:08:59 \n",
178
+ "96 3771 Maria Bypass Apt. 651\\nLake Lindsey, MD 0... 2023-10-17 19:27:21 \n",
179
+ "97 2683 Thomas Court Apt. 694\\nRachaelchester, CT... 2020-10-26 14:30:56 \n",
180
+ "98 79209 Garner Trafficway\\nPort William, KY 91119 2020-03-25 15:18:29 \n",
181
+ "99 035 Sharon Road Suite 873\\nNorth Jamesmouth, A... 2021-01-07 17:20:17 \n",
182
+ "\n",
183
+ " A1C (diabetes) BMP (glucose) BMP (calcium) BMP (electrolytes) \\\n",
184
+ "0 8.3 Glucose: 105.1 Calcium: 9.4 Electrolytes: Normal \n",
185
+ "1 4.9 Glucose: 140.1 Calcium: 9.9 Electrolytes: Abnormal \n",
186
+ "2 8.8 Glucose: 103.6 Calcium: 9.3 Electrolytes: Abnormal \n",
187
+ "3 6.7 Glucose: 124.4 Calcium: 9.4 Electrolytes: Normal \n",
188
+ "4 12.1 Glucose: 135.4 Calcium: 8.6 Electrolytes: Abnormal \n",
189
+ ".. ... ... ... ... \n",
190
+ "95 8.7 Glucose: 143.4 Calcium: 9.0 Electrolytes: Abnormal \n",
191
+ "96 5.7 Glucose: 126.7 Calcium: 10.0 Electrolytes: Abnormal \n",
192
+ "97 9.0 Glucose: 133.6 Calcium: 8.7 Electrolytes: Normal \n",
193
+ "98 9.2 Glucose: 78.1 Calcium: 9.8 Electrolytes: Abnormal \n",
194
+ "99 13.7 Glucose: 113.7 Calcium: 8.7 Electrolytes: Normal \n",
195
+ "\n",
196
+ " Lipid Panel (cholesterol) ESR (blood cell clumps) \n",
197
+ "0 217.2 23 \n",
198
+ "1 165.0 36 \n",
199
+ "2 183.9 34 \n",
200
+ "3 176.4 13 \n",
201
+ "4 129.9 29 \n",
202
+ ".. ... ... \n",
203
+ "95 151.7 7 \n",
204
+ "96 157.2 19 \n",
205
+ "97 141.0 89 \n",
206
+ "98 228.4 84 \n",
207
+ "99 214.2 50 \n",
208
+ "\n",
209
+ "[100 rows x 14 columns]"
210
+ ],
211
+ "text/html": [
212
+ "\n",
213
+ " <div id=\"df-29ac59bd-88ba-46db-9129-4154d4fdd690\" class=\"colab-df-container\">\n",
214
+ " <div>\n",
215
+ "<style scoped>\n",
216
+ " .dataframe tbody tr th:only-of-type {\n",
217
+ " vertical-align: middle;\n",
218
+ " }\n",
219
+ "\n",
220
+ " .dataframe tbody tr th {\n",
221
+ " vertical-align: top;\n",
222
+ " }\n",
223
+ "\n",
224
+ " .dataframe thead th {\n",
225
+ " text-align: right;\n",
226
+ " }\n",
227
+ "</style>\n",
228
+ "<table border=\"1\" class=\"dataframe\">\n",
229
+ " <thead>\n",
230
+ " <tr style=\"text-align: right;\">\n",
231
+ " <th></th>\n",
232
+ " <th>First Name</th>\n",
233
+ " <th>Last Name</th>\n",
234
+ " <th>Policy No.</th>\n",
235
+ " <th>Gender</th>\n",
236
+ " <th>Address</th>\n",
237
+ " <th>State</th>\n",
238
+ " <th>Physician Office Address</th>\n",
239
+ " <th>Time of Test</th>\n",
240
+ " <th>A1C (diabetes)</th>\n",
241
+ " <th>BMP (glucose)</th>\n",
242
+ " <th>BMP (calcium)</th>\n",
243
+ " <th>BMP (electrolytes)</th>\n",
244
+ " <th>Lipid Panel (cholesterol)</th>\n",
245
+ " <th>ESR (blood cell clumps)</th>\n",
246
+ " </tr>\n",
247
+ " </thead>\n",
248
+ " <tbody>\n",
249
+ " <tr>\n",
250
+ " <th>0</th>\n",
251
+ " <td>John</td>\n",
252
+ " <td>Williams</td>\n",
253
+ " <td>51405852</td>\n",
254
+ " <td>Male</td>\n",
255
+ " <td>06021 Andrews Plains\\nNorth Barbaraview, WA 84509</td>\n",
256
+ " <td>Texas</td>\n",
257
+ " <td>Unit 9742 Box 1185\\nDPO AP 13090</td>\n",
258
+ " <td>2021-11-25 08:48:21</td>\n",
259
+ " <td>8.3</td>\n",
260
+ " <td>Glucose: 105.1</td>\n",
261
+ " <td>Calcium: 9.4</td>\n",
262
+ " <td>Electrolytes: Normal</td>\n",
263
+ " <td>217.2</td>\n",
264
+ " <td>23</td>\n",
265
+ " </tr>\n",
266
+ " <tr>\n",
267
+ " <th>1</th>\n",
268
+ " <td>Matthew</td>\n",
269
+ " <td>Barrett</td>\n",
270
+ " <td>86698828</td>\n",
271
+ " <td>Female</td>\n",
272
+ " <td>02997 Deborah Path Suite 795\\nDeborahmouth, NJ...</td>\n",
273
+ " <td>Delaware</td>\n",
274
+ " <td>711 Scott Gateway Suite 873\\nWendyburgh, ME 72114</td>\n",
275
+ " <td>2020-05-16 18:28:15</td>\n",
276
+ " <td>4.9</td>\n",
277
+ " <td>Glucose: 140.1</td>\n",
278
+ " <td>Calcium: 9.9</td>\n",
279
+ " <td>Electrolytes: Abnormal</td>\n",
280
+ " <td>165.0</td>\n",
281
+ " <td>36</td>\n",
282
+ " </tr>\n",
283
+ " <tr>\n",
284
+ " <th>2</th>\n",
285
+ " <td>Michael</td>\n",
286
+ " <td>Williams</td>\n",
287
+ " <td>97705492</td>\n",
288
+ " <td>Other</td>\n",
289
+ " <td>4519 Joseph Extensions Apt. 395\\nPort William,...</td>\n",
290
+ " <td>Kentucky</td>\n",
291
+ " <td>732 Bryant Street Suite 463\\nNew Jessicaview, ...</td>\n",
292
+ " <td>2022-04-14 22:44:55</td>\n",
293
+ " <td>8.8</td>\n",
294
+ " <td>Glucose: 103.6</td>\n",
295
+ " <td>Calcium: 9.3</td>\n",
296
+ " <td>Electrolytes: Abnormal</td>\n",
297
+ " <td>183.9</td>\n",
298
+ " <td>34</td>\n",
299
+ " </tr>\n",
300
+ " <tr>\n",
301
+ " <th>3</th>\n",
302
+ " <td>Billy</td>\n",
303
+ " <td>Jones</td>\n",
304
+ " <td>42019134</td>\n",
305
+ " <td>Other</td>\n",
306
+ " <td>113 Rivera Plain Apt. 143\\nPort Cindy, CO 74694</td>\n",
307
+ " <td>South Carolina</td>\n",
308
+ " <td>24170 Merritt Mountain Apt. 882\\nSnydershire, ...</td>\n",
309
+ " <td>2023-10-09 12:48:11</td>\n",
310
+ " <td>6.7</td>\n",
311
+ " <td>Glucose: 124.4</td>\n",
312
+ " <td>Calcium: 9.4</td>\n",
313
+ " <td>Electrolytes: Normal</td>\n",
314
+ " <td>176.4</td>\n",
315
+ " <td>13</td>\n",
316
+ " </tr>\n",
317
+ " <tr>\n",
318
+ " <th>4</th>\n",
319
+ " <td>Jessica</td>\n",
320
+ " <td>Lawson</td>\n",
321
+ " <td>62074003</td>\n",
322
+ " <td>Female</td>\n",
323
+ " <td>82657 Ruiz Point\\nLake Christophershire, KS 37543</td>\n",
324
+ " <td>Delaware</td>\n",
325
+ " <td>840 Rachel Freeway\\nPort Robert, NC 93052</td>\n",
326
+ " <td>2021-01-24 18:34:26</td>\n",
327
+ " <td>12.1</td>\n",
328
+ " <td>Glucose: 135.4</td>\n",
329
+ " <td>Calcium: 8.6</td>\n",
330
+ " <td>Electrolytes: Abnormal</td>\n",
331
+ " <td>129.9</td>\n",
332
+ " <td>29</td>\n",
333
+ " </tr>\n",
334
+ " <tr>\n",
335
+ " <th>...</th>\n",
336
+ " <td>...</td>\n",
337
+ " <td>...</td>\n",
338
+ " <td>...</td>\n",
339
+ " <td>...</td>\n",
340
+ " <td>...</td>\n",
341
+ " <td>...</td>\n",
342
+ " <td>...</td>\n",
343
+ " <td>...</td>\n",
344
+ " <td>...</td>\n",
345
+ " <td>...</td>\n",
346
+ " <td>...</td>\n",
347
+ " <td>...</td>\n",
348
+ " <td>...</td>\n",
349
+ " <td>...</td>\n",
350
+ " </tr>\n",
351
+ " <tr>\n",
352
+ " <th>95</th>\n",
353
+ " <td>Peter</td>\n",
354
+ " <td>Burns</td>\n",
355
+ " <td>58173378</td>\n",
356
+ " <td>Other</td>\n",
357
+ " <td>USCGC Parker\\nFPO AA 16320</td>\n",
358
+ " <td>New York</td>\n",
359
+ " <td>30807 Mitchell Estates\\nGregorybury, KY 95175</td>\n",
360
+ " <td>2020-01-16 08:08:59</td>\n",
361
+ " <td>8.7</td>\n",
362
+ " <td>Glucose: 143.4</td>\n",
363
+ " <td>Calcium: 9.0</td>\n",
364
+ " <td>Electrolytes: Abnormal</td>\n",
365
+ " <td>151.7</td>\n",
366
+ " <td>7</td>\n",
367
+ " </tr>\n",
368
+ " <tr>\n",
369
+ " <th>96</th>\n",
370
+ " <td>Nicole</td>\n",
371
+ " <td>Campbell</td>\n",
372
+ " <td>96638320</td>\n",
373
+ " <td>Female</td>\n",
374
+ " <td>USS Davis\\nFPO AP 68621</td>\n",
375
+ " <td>Vermont</td>\n",
376
+ " <td>3771 Maria Bypass Apt. 651\\nLake Lindsey, MD 0...</td>\n",
377
+ " <td>2023-10-17 19:27:21</td>\n",
378
+ " <td>5.7</td>\n",
379
+ " <td>Glucose: 126.7</td>\n",
380
+ " <td>Calcium: 10.0</td>\n",
381
+ " <td>Electrolytes: Abnormal</td>\n",
382
+ " <td>157.2</td>\n",
383
+ " <td>19</td>\n",
384
+ " </tr>\n",
385
+ " <tr>\n",
386
+ " <th>97</th>\n",
387
+ " <td>Jeffrey</td>\n",
388
+ " <td>Moore</td>\n",
389
+ " <td>25800342</td>\n",
390
+ " <td>Male</td>\n",
391
+ " <td>9240 Anthony Flat Apt. 187\\nLake Stephenberg, ...</td>\n",
392
+ " <td>Georgia</td>\n",
393
+ " <td>2683 Thomas Court Apt. 694\\nRachaelchester, CT...</td>\n",
394
+ " <td>2020-10-26 14:30:56</td>\n",
395
+ " <td>9.0</td>\n",
396
+ " <td>Glucose: 133.6</td>\n",
397
+ " <td>Calcium: 8.7</td>\n",
398
+ " <td>Electrolytes: Normal</td>\n",
399
+ " <td>141.0</td>\n",
400
+ " <td>89</td>\n",
401
+ " </tr>\n",
402
+ " <tr>\n",
403
+ " <th>98</th>\n",
404
+ " <td>Albert</td>\n",
405
+ " <td>Collins</td>\n",
406
+ " <td>38748339</td>\n",
407
+ " <td>Other</td>\n",
408
+ " <td>5230 Gary Motorway\\nWufort, VT 79419</td>\n",
409
+ " <td>Delaware</td>\n",
410
+ " <td>79209 Garner Trafficway\\nPort William, KY 91119</td>\n",
411
+ " <td>2020-03-25 15:18:29</td>\n",
412
+ " <td>9.2</td>\n",
413
+ " <td>Glucose: 78.1</td>\n",
414
+ " <td>Calcium: 9.8</td>\n",
415
+ " <td>Electrolytes: Abnormal</td>\n",
416
+ " <td>228.4</td>\n",
417
+ " <td>84</td>\n",
418
+ " </tr>\n",
419
+ " <tr>\n",
420
+ " <th>99</th>\n",
421
+ " <td>Anthony</td>\n",
422
+ " <td>Arias</td>\n",
423
+ " <td>29343897</td>\n",
424
+ " <td>Male</td>\n",
425
+ " <td>814 Christopher Meadows\\nPatriciashire, NY 73616</td>\n",
426
+ " <td>Arkansas</td>\n",
427
+ " <td>035 Sharon Road Suite 873\\nNorth Jamesmouth, A...</td>\n",
428
+ " <td>2021-01-07 17:20:17</td>\n",
429
+ " <td>13.7</td>\n",
430
+ " <td>Glucose: 113.7</td>\n",
431
+ " <td>Calcium: 8.7</td>\n",
432
+ " <td>Electrolytes: Normal</td>\n",
433
+ " <td>214.2</td>\n",
434
+ " <td>50</td>\n",
435
+ " </tr>\n",
436
+ " </tbody>\n",
437
+ "</table>\n",
438
+ "<p>100 rows × 14 columns</p>\n",
439
+ "</div>\n",
440
+ " <div class=\"colab-df-buttons\">\n",
441
+ "\n",
442
+ " <div class=\"colab-df-container\">\n",
443
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-29ac59bd-88ba-46db-9129-4154d4fdd690')\"\n",
444
+ " title=\"Convert this dataframe to an interactive table.\"\n",
445
+ " style=\"display:none;\">\n",
446
+ "\n",
447
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
448
+ " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
449
+ " </svg>\n",
450
+ " </button>\n",
451
+ "\n",
452
+ " <style>\n",
453
+ " .colab-df-container {\n",
454
+ " display:flex;\n",
455
+ " gap: 12px;\n",
456
+ " }\n",
457
+ "\n",
458
+ " .colab-df-convert {\n",
459
+ " background-color: #E8F0FE;\n",
460
+ " border: none;\n",
461
+ " border-radius: 50%;\n",
462
+ " cursor: pointer;\n",
463
+ " display: none;\n",
464
+ " fill: #1967D2;\n",
465
+ " height: 32px;\n",
466
+ " padding: 0 0 0 0;\n",
467
+ " width: 32px;\n",
468
+ " }\n",
469
+ "\n",
470
+ " .colab-df-convert:hover {\n",
471
+ " background-color: #E2EBFA;\n",
472
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
473
+ " fill: #174EA6;\n",
474
+ " }\n",
475
+ "\n",
476
+ " .colab-df-buttons div {\n",
477
+ " margin-bottom: 4px;\n",
478
+ " }\n",
479
+ "\n",
480
+ " [theme=dark] .colab-df-convert {\n",
481
+ " background-color: #3B4455;\n",
482
+ " fill: #D2E3FC;\n",
483
+ " }\n",
484
+ "\n",
485
+ " [theme=dark] .colab-df-convert:hover {\n",
486
+ " background-color: #434B5C;\n",
487
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
488
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
489
+ " fill: #FFFFFF;\n",
490
+ " }\n",
491
+ " </style>\n",
492
+ "\n",
493
+ " <script>\n",
494
+ " const buttonEl =\n",
495
+ " document.querySelector('#df-29ac59bd-88ba-46db-9129-4154d4fdd690 button.colab-df-convert');\n",
496
+ " buttonEl.style.display =\n",
497
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
498
+ "\n",
499
+ " async function convertToInteractive(key) {\n",
500
+ " const element = document.querySelector('#df-29ac59bd-88ba-46db-9129-4154d4fdd690');\n",
501
+ " const dataTable =\n",
502
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
503
+ " [key], {});\n",
504
+ " if (!dataTable) return;\n",
505
+ "\n",
506
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
507
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
508
+ " + ' to learn more about interactive tables.';\n",
509
+ " element.innerHTML = '';\n",
510
+ " dataTable['output_type'] = 'display_data';\n",
511
+ " await google.colab.output.renderOutput(dataTable, element);\n",
512
+ " const docLink = document.createElement('div');\n",
513
+ " docLink.innerHTML = docLinkHtml;\n",
514
+ " element.appendChild(docLink);\n",
515
+ " }\n",
516
+ " </script>\n",
517
+ " </div>\n",
518
+ "\n",
519
+ "\n",
520
+ "<div id=\"df-0c2b3b01-4853-4cc3-8ead-48095a1eccb4\">\n",
521
+ " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-0c2b3b01-4853-4cc3-8ead-48095a1eccb4')\"\n",
522
+ " title=\"Suggest charts\"\n",
523
+ " style=\"display:none;\">\n",
524
+ "\n",
525
+ "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
526
+ " width=\"24px\">\n",
527
+ " <g>\n",
528
+ " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
529
+ " </g>\n",
530
+ "</svg>\n",
531
+ " </button>\n",
532
+ "\n",
533
+ "<style>\n",
534
+ " .colab-df-quickchart {\n",
535
+ " --bg-color: #E8F0FE;\n",
536
+ " --fill-color: #1967D2;\n",
537
+ " --hover-bg-color: #E2EBFA;\n",
538
+ " --hover-fill-color: #174EA6;\n",
539
+ " --disabled-fill-color: #AAA;\n",
540
+ " --disabled-bg-color: #DDD;\n",
541
+ " }\n",
542
+ "\n",
543
+ " [theme=dark] .colab-df-quickchart {\n",
544
+ " --bg-color: #3B4455;\n",
545
+ " --fill-color: #D2E3FC;\n",
546
+ " --hover-bg-color: #434B5C;\n",
547
+ " --hover-fill-color: #FFFFFF;\n",
548
+ " --disabled-bg-color: #3B4455;\n",
549
+ " --disabled-fill-color: #666;\n",
550
+ " }\n",
551
+ "\n",
552
+ " .colab-df-quickchart {\n",
553
+ " background-color: var(--bg-color);\n",
554
+ " border: none;\n",
555
+ " border-radius: 50%;\n",
556
+ " cursor: pointer;\n",
557
+ " display: none;\n",
558
+ " fill: var(--fill-color);\n",
559
+ " height: 32px;\n",
560
+ " padding: 0;\n",
561
+ " width: 32px;\n",
562
+ " }\n",
563
+ "\n",
564
+ " .colab-df-quickchart:hover {\n",
565
+ " background-color: var(--hover-bg-color);\n",
566
+ " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
567
+ " fill: var(--button-hover-fill-color);\n",
568
+ " }\n",
569
+ "\n",
570
+ " .colab-df-quickchart-complete:disabled,\n",
571
+ " .colab-df-quickchart-complete:disabled:hover {\n",
572
+ " background-color: var(--disabled-bg-color);\n",
573
+ " fill: var(--disabled-fill-color);\n",
574
+ " box-shadow: none;\n",
575
+ " }\n",
576
+ "\n",
577
+ " .colab-df-spinner {\n",
578
+ " border: 2px solid var(--fill-color);\n",
579
+ " border-color: transparent;\n",
580
+ " border-bottom-color: var(--fill-color);\n",
581
+ " animation:\n",
582
+ " spin 1s steps(1) infinite;\n",
583
+ " }\n",
584
+ "\n",
585
+ " @keyframes spin {\n",
586
+ " 0% {\n",
587
+ " border-color: transparent;\n",
588
+ " border-bottom-color: var(--fill-color);\n",
589
+ " border-left-color: var(--fill-color);\n",
590
+ " }\n",
591
+ " 20% {\n",
592
+ " border-color: transparent;\n",
593
+ " border-left-color: var(--fill-color);\n",
594
+ " border-top-color: var(--fill-color);\n",
595
+ " }\n",
596
+ " 30% {\n",
597
+ " border-color: transparent;\n",
598
+ " border-left-color: var(--fill-color);\n",
599
+ " border-top-color: var(--fill-color);\n",
600
+ " border-right-color: var(--fill-color);\n",
601
+ " }\n",
602
+ " 40% {\n",
603
+ " border-color: transparent;\n",
604
+ " border-right-color: var(--fill-color);\n",
605
+ " border-top-color: var(--fill-color);\n",
606
+ " }\n",
607
+ " 60% {\n",
608
+ " border-color: transparent;\n",
609
+ " border-right-color: var(--fill-color);\n",
610
+ " }\n",
611
+ " 80% {\n",
612
+ " border-color: transparent;\n",
613
+ " border-right-color: var(--fill-color);\n",
614
+ " border-bottom-color: var(--fill-color);\n",
615
+ " }\n",
616
+ " 90% {\n",
617
+ " border-color: transparent;\n",
618
+ " border-bottom-color: var(--fill-color);\n",
619
+ " }\n",
620
+ " }\n",
621
+ "</style>\n",
622
+ "\n",
623
+ " <script>\n",
624
+ " async function quickchart(key) {\n",
625
+ " const quickchartButtonEl =\n",
626
+ " document.querySelector('#' + key + ' button');\n",
627
+ " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
628
+ " quickchartButtonEl.classList.add('colab-df-spinner');\n",
629
+ " try {\n",
630
+ " const charts = await google.colab.kernel.invokeFunction(\n",
631
+ " 'suggestCharts', [key], {});\n",
632
+ " } catch (error) {\n",
633
+ " console.error('Error during call to suggestCharts:', error);\n",
634
+ " }\n",
635
+ " quickchartButtonEl.classList.remove('colab-df-spinner');\n",
636
+ " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
637
+ " }\n",
638
+ " (() => {\n",
639
+ " let quickchartButtonEl =\n",
640
+ " document.querySelector('#df-0c2b3b01-4853-4cc3-8ead-48095a1eccb4 button');\n",
641
+ " quickchartButtonEl.style.display =\n",
642
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
643
+ " })();\n",
644
+ " </script>\n",
645
+ "</div>\n",
646
+ "\n",
647
+ " <div id=\"id_ea31243d-4d45-4c93-ab1b-65929910263b\">\n",
648
+ " <style>\n",
649
+ " .colab-df-generate {\n",
650
+ " background-color: #E8F0FE;\n",
651
+ " border: none;\n",
652
+ " border-radius: 50%;\n",
653
+ " cursor: pointer;\n",
654
+ " display: none;\n",
655
+ " fill: #1967D2;\n",
656
+ " height: 32px;\n",
657
+ " padding: 0 0 0 0;\n",
658
+ " width: 32px;\n",
659
+ " }\n",
660
+ "\n",
661
+ " .colab-df-generate:hover {\n",
662
+ " background-color: #E2EBFA;\n",
663
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
664
+ " fill: #174EA6;\n",
665
+ " }\n",
666
+ "\n",
667
+ " [theme=dark] .colab-df-generate {\n",
668
+ " background-color: #3B4455;\n",
669
+ " fill: #D2E3FC;\n",
670
+ " }\n",
671
+ "\n",
672
+ " [theme=dark] .colab-df-generate:hover {\n",
673
+ " background-color: #434B5C;\n",
674
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
675
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
676
+ " fill: #FFFFFF;\n",
677
+ " }\n",
678
+ " </style>\n",
679
+ " <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df')\"\n",
680
+ " title=\"Generate code using this dataframe.\"\n",
681
+ " style=\"display:none;\">\n",
682
+ "\n",
683
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
684
+ " width=\"24px\">\n",
685
+ " <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
686
+ " </svg>\n",
687
+ " </button>\n",
688
+ " <script>\n",
689
+ " (() => {\n",
690
+ " const buttonEl =\n",
691
+ " document.querySelector('#id_ea31243d-4d45-4c93-ab1b-65929910263b button.colab-df-generate');\n",
692
+ " buttonEl.style.display =\n",
693
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
694
+ "\n",
695
+ " buttonEl.onclick = () => {\n",
696
+ " google.colab.notebook.generateWithVariable('df');\n",
697
+ " }\n",
698
+ " })();\n",
699
+ " </script>\n",
700
+ " </div>\n",
701
+ "\n",
702
+ " </div>\n",
703
+ " </div>\n"
704
+ ],
705
+ "application/vnd.google.colaboratory.intrinsic+json": {
706
+ "type": "dataframe",
707
+ "variable_name": "df",
708
+ "summary": "{\n \"name\": \"df\",\n \"rows\": 100,\n \"fields\": [\n {\n \"column\": \"First Name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 82,\n \"samples\": [\n \"Daniel\",\n \"John\",\n \"Nancy\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Last Name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 86,\n \"samples\": [\n \"Myers\",\n \"Williams\",\n \"Martinez\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Policy No.\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 24995568,\n \"min\": 10077874,\n \"max\": 97705492,\n \"num_unique_values\": 100,\n \"samples\": [\n 51225828,\n 79531798,\n 17159895\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Gender\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Male\",\n \"Female\",\n \"Other\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Address\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 100,\n \"samples\": [\n \"49286 Norman Trace\\nBaileyburgh, FM 60919\",\n \"67471 Orr Divide Suite 876\\nAlexanderport, IL 62852\",\n \"PSC 8205, Box 3340\\nAPO AP 96934\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"State\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 41,\n \"samples\": [\n \"Colorado\",\n \"Iowa\",\n \"Rhode Island\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Physician Office Address\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 100,\n \"samples\": [\n \"PSC 4665, Box 9364\\nAPO AE 81123\",\n \"Unit 8213 Box 4682\\nDPO AA 62814\",\n \"27278 Robbins Place\\nRonaldville, OK 66693\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Time of Test\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 100,\n \"samples\": [\n \"2022-08-18 11:22:50\",\n \"2022-09-23 17:04:36\",\n \"2023-12-27 13:37:21\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"A1C (diabetes)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.8247956929786455,\n \"min\": 4.6,\n \"max\": 14.0,\n \"num_unique_values\": 62,\n \"samples\": [\n 13.2,\n 9.2,\n 8.3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"BMP (glucose)\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 95,\n \"samples\": [\n \"Glucose: 85.8\",\n \"Glucose: 76.4\",\n \"Glucose: 118.3\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"BMP (calcium)\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 17,\n \"samples\": [\n \"Calcium: 9.4\",\n \"Calcium: 9.9\",\n \"Calcium: 8.9\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"BMP (electrolytes)\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Electrolytes: Abnormal\",\n \"Electrolytes: Normal\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Lipid Panel (cholesterol)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 35.04015605506968,\n \"min\": 127.9,\n \"max\": 239.6,\n \"num_unique_values\": 98,\n \"samples\": [\n 141.8,\n 191.6\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"ESR (blood cell clumps)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 31,\n \"min\": 0,\n \"max\": 100,\n \"num_unique_values\": 63,\n \"samples\": [\n 89,\n 96\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
709
+ }
710
+ },
711
+ "metadata": {},
712
+ "execution_count": 8
713
+ }
714
+ ]
715
+ },
716
+ {
717
+ "cell_type": "code",
718
+ "source": [
719
+ "df.to_csv(\"sample_patient_bloodtest_data.csv\", index=False)"
720
+ ],
721
+ "metadata": {
722
+ "id": "8awx6Z9qBPIc"
723
+ },
724
+ "execution_count": 10,
725
+ "outputs": []
726
+ },
727
+ {
728
+ "cell_type": "code",
729
+ "source": [],
730
+ "metadata": {
731
+ "id": "ZXTS62RSBZKi"
732
+ },
733
+ "execution_count": null,
734
+ "outputs": []
735
+ }
736
+ ]
737
+ }