diff --git "a/Pandas_practice_DataEngg.ipynb" "b/Pandas_practice_DataEngg.ipynb" new file mode 100644--- /dev/null +++ "b/Pandas_practice_DataEngg.ipynb" @@ -0,0 +1,3279 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0\n", + "0 1\n", + "1 2\n", + "2 3\n", + "3 4\n" + ] + } + ], + "source": [ + "df=[1,2,3,4]\n", + "print(pd.DataFrame(df))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 1\n", + "1 2\n", + "2 3\n", + "3 4\n", + "dtype: int64\n" + ] + } + ], + "source": [ + "print(pd.Series(df))" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [], + "source": [ + "Employ=pd.read_csv(\"employees.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [], + "source": [ + "Employ_dub=Employ.head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>First Name</th>\n", + " <th>Gender</th>\n", + " <th>Start Date</th>\n", + " <th>Last Login Time</th>\n", + " <th>Salary</th>\n", + " <th>Bonus %</th>\n", + " <th>Senior Management</th>\n", + " <th>Team</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Douglas</td>\n", + " <td>Male</td>\n", + " <td>8/6/1993</td>\n", + " <td>12:42 PM</td>\n", + " <td>97308</td>\n", + " <td>6.945</td>\n", + " <td>True</td>\n", + " <td>Marketing</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>Thomas</td>\n", + " <td>Male</td>\n", + " <td>3/31/1996</td>\n", + " <td>6:53 AM</td>\n", + " <td>61933</td>\n", + " <td>4.170</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>Maria</td>\n", + " <td>Female</td>\n", + " <td>4/23/1993</td>\n", + " <td>11:17 AM</td>\n", + " <td>130590</td>\n", + " <td>11.858</td>\n", + " <td>False</td>\n", + " <td>Finance</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>Jerry</td>\n", + " <td>Male</td>\n", + " <td>3/4/2005</td>\n", + " <td>1:00 PM</td>\n", + " <td>138705</td>\n", + " <td>9.340</td>\n", + " <td>True</td>\n", + " <td>Finance</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>Larry</td>\n", + " <td>Male</td>\n", + " <td>1/24/1998</td>\n", + " <td>4:47 PM</td>\n", + " <td>101004</td>\n", + " <td>1.389</td>\n", + " <td>True</td>\n", + " <td>Client Services</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>Dennis</td>\n", + " <td>Male</td>\n", + " <td>4/18/1987</td>\n", + " <td>1:35 AM</td>\n", + " <td>115163</td>\n", + " <td>10.125</td>\n", + " <td>False</td>\n", + " <td>Legal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>Ruby</td>\n", + " <td>Female</td>\n", + " <td>8/17/1987</td>\n", + " <td>4:20 PM</td>\n", + " <td>65476</td>\n", + " <td>10.012</td>\n", + " <td>True</td>\n", + " <td>Product</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>NaN</td>\n", + " <td>Female</td>\n", + " <td>7/20/2015</td>\n", + " <td>10:43 AM</td>\n", + " <td>45906</td>\n", + " <td>11.598</td>\n", + " <td>NaN</td>\n", + " <td>Finance</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>Angela</td>\n", + " <td>Female</td>\n", + " <td>11/22/2005</td>\n", + " <td>6:29 AM</td>\n", + " <td>95570</td>\n", + " <td>18.523</td>\n", + " <td>True</td>\n", + " <td>Engineering</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>Frances</td>\n", + " <td>Female</td>\n", + " <td>8/8/2002</td>\n", + " <td>6:51 AM</td>\n", + " <td>139852</td>\n", + " <td>7.524</td>\n", + " <td>True</td>\n", + " <td>Business Development</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>Louise</td>\n", + " <td>Female</td>\n", + " <td>8/12/1980</td>\n", + " <td>9:01 AM</td>\n", + " <td>63241</td>\n", + " <td>15.132</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>Julie</td>\n", + " <td>Female</td>\n", + " <td>10/26/1997</td>\n", + " <td>3:19 PM</td>\n", + " <td>102508</td>\n", + " <td>12.637</td>\n", + " <td>True</td>\n", + " <td>Legal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>Brandon</td>\n", + " <td>Male</td>\n", + " <td>12/1/1980</td>\n", + " <td>1:08 AM</td>\n", + " <td>112807</td>\n", + " <td>17.492</td>\n", + " <td>True</td>\n", + " <td>Human Resources</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>Gary</td>\n", + " <td>Male</td>\n", + " <td>1/27/2008</td>\n", + " <td>11:40 PM</td>\n", + " <td>109831</td>\n", + " <td>5.831</td>\n", + " <td>False</td>\n", + " <td>Sales</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>Kimberly</td>\n", + " <td>Female</td>\n", + " <td>1/14/1999</td>\n", + " <td>7:13 AM</td>\n", + " <td>41426</td>\n", + " <td>14.543</td>\n", + " <td>True</td>\n", + " <td>Finance</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>Lillian</td>\n", + " <td>Female</td>\n", + " <td>6/5/2016</td>\n", + " <td>6:09 AM</td>\n", + " <td>59414</td>\n", + " <td>1.256</td>\n", + " <td>False</td>\n", + " <td>Product</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>Jeremy</td>\n", + " <td>Male</td>\n", + " <td>9/21/2010</td>\n", + " <td>5:56 AM</td>\n", + " <td>90370</td>\n", + " <td>7.369</td>\n", + " <td>False</td>\n", + " <td>Human Resources</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>Shawn</td>\n", + " <td>Male</td>\n", + " <td>12/7/1986</td>\n", + " <td>7:45 PM</td>\n", + " <td>111737</td>\n", + " <td>6.414</td>\n", + " <td>False</td>\n", + " <td>Product</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18</th>\n", + " <td>Diana</td>\n", + " <td>Female</td>\n", + " <td>10/23/1981</td>\n", + " <td>10:27 AM</td>\n", + " <td>132940</td>\n", + " <td>19.082</td>\n", + " <td>False</td>\n", + " <td>Client Services</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>Donna</td>\n", + " <td>Female</td>\n", + " <td>7/22/2010</td>\n", + " <td>3:48 AM</td>\n", + " <td>81014</td>\n", + " <td>1.894</td>\n", + " <td>False</td>\n", + " <td>Product</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " First Name Gender Start Date Last Login Time Salary Bonus % \\\n", + "0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n", + "1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n", + "2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n", + "3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n", + "4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n", + "5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n", + "6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n", + "7 NaN Female 7/20/2015 10:43 AM 45906 11.598 \n", + "8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n", + "9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n", + "10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n", + "11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n", + "12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n", + "13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n", + "14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n", + "15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n", + "16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n", + "17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n", + "18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n", + "19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n", + "\n", + " Senior Management Team \n", + "0 True Marketing \n", + "1 True NaN \n", + "2 False Finance \n", + "3 True Finance \n", + "4 True Client Services \n", + "5 False Legal \n", + "6 True Product \n", + "7 NaN Finance \n", + "8 True Engineering \n", + "9 True Business Development \n", + "10 True NaN \n", + "11 True Legal \n", + "12 True Human Resources \n", + "13 False Sales \n", + "14 True Finance \n", + "15 False Product \n", + "16 False Human Resources \n", + "17 False Product \n", + "18 False Client Services \n", + "19 False Product " + ] + }, + "execution_count": 117, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Employ_dub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 20 entries, 0 to 19\n", + "Data columns (total 8 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 First Name 19 non-null object \n", + " 1 Gender 20 non-null object \n", + " 2 Start Date 20 non-null object \n", + " 3 Last Login Time 20 non-null object \n", + " 4 Salary 20 non-null int64 \n", + " 5 Bonus % 20 non-null float64\n", + " 6 Senior Management 19 non-null object \n", + " 7 Team 18 non-null object \n", + "dtypes: float64(1), int64(1), object(6)\n", + "memory usage: 868.0+ bytes\n" + ] + } + ], + "source": [ + "#total info about the employee\n", + "Employ_dub.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>First Name</th>\n", + " <th>Gender</th>\n", + " <th>Start Date</th>\n", + " <th>Last Login Time</th>\n", + " <th>Salary</th>\n", + " <th>Bonus %</th>\n", + " <th>Senior Management</th>\n", + " <th>Team</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " First Name Gender Start Date Last Login Time Salary Bonus % \\\n", + "0 False False False False False False \n", + "1 False False False False False False \n", + "2 False False False False False False \n", + "3 False False False False False False \n", + "4 False False False False False False \n", + "5 False False False False False False \n", + "6 False False False False False False \n", + "7 True False False False False False \n", + "8 False False False False False False \n", + "9 False False False False False False \n", + "10 False False False False False False \n", + "11 False False False False False False \n", + "12 False False False False False False \n", + "13 False False False False False False \n", + "14 False False False False False False \n", + "15 False False False False False False \n", + "16 False False False False False False \n", + "17 False False False False False False \n", + "18 False False False False False False \n", + "19 False False False False False False \n", + "\n", + " Senior Management Team \n", + "0 False False \n", + "1 False True \n", + "2 False False \n", + "3 False False \n", + "4 False False \n", + "5 False False \n", + "6 False False \n", + "7 True False \n", + "8 False False \n", + "9 False False \n", + "10 False True \n", + "11 False False \n", + "12 False False \n", + "13 False False \n", + "14 False False \n", + "15 False False \n", + "16 False False \n", + "17 False False \n", + "18 False False \n", + "19 False False " + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Employ_dub.isnull()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "First Name 1\n", + "Gender 0\n", + "Start Date 0\n", + "Last Login Time 0\n", + "Salary 0\n", + "Bonus % 0\n", + "Senior Management 1\n", + "Team 2\n", + "dtype: int64" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#checking for the null values in the dataset of employee\n", + "Employ_dub.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "#changing the name of the dataset\n", + "ed=Employ_dub" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(20, 8)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#dimension of the dataset\n", + "ed.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['First Name', 'Gender', 'Start Date', 'Last Login Time', 'Salary',\n", + " 'Bonus %', 'Senior Management', 'Team'],\n", + " dtype='object')" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ed.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "#working on the dictionary for a while:" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "#creating test objects:\n", + "import numpy as np\n", + "ff=pd.DataFrame(np.random.rand(20,5))" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " <th>2</th>\n", + " <th>3</th>\n", + " <th>4</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0.020780</td>\n", + " <td>0.365190</td>\n", + " <td>0.673825</td>\n", + " <td>0.800112</td>\n", + " <td>0.188644</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>0.660845</td>\n", + " <td>0.265913</td>\n", + " <td>0.445028</td>\n", + " <td>0.889438</td>\n", + " <td>0.601047</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0.646987</td>\n", + " <td>0.926823</td>\n", + " <td>0.722838</td>\n", + " <td>0.475271</td>\n", + " <td>0.827945</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>0.871724</td>\n", + " <td>0.290353</td>\n", + " <td>0.099578</td>\n", + " <td>0.109949</td>\n", + " <td>0.229182</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>0.704794</td>\n", + " <td>0.884062</td>\n", + " <td>0.751327</td>\n", + " <td>0.595746</td>\n", + " <td>0.612269</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>0.371269</td>\n", + " <td>0.560512</td>\n", + " <td>0.510264</td>\n", + " <td>0.247923</td>\n", + " <td>0.618853</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>0.150398</td>\n", + " <td>0.116999</td>\n", + " <td>0.934865</td>\n", + " <td>0.315723</td>\n", + " <td>0.221538</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>0.556336</td>\n", + " <td>0.875514</td>\n", + " <td>0.471526</td>\n", + " <td>0.539511</td>\n", + " <td>0.271221</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>0.428221</td>\n", + " <td>0.546766</td>\n", + " <td>0.921274</td>\n", + " <td>0.500520</td>\n", + " <td>0.400341</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>0.150170</td>\n", + " <td>0.802378</td>\n", + " <td>0.608124</td>\n", + " <td>0.342871</td>\n", + " <td>0.076631</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>0.099049</td>\n", + " <td>0.280748</td>\n", + " <td>0.865939</td>\n", + " <td>0.214541</td>\n", + " <td>0.083318</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>0.042867</td>\n", + " <td>0.701639</td>\n", + " <td>0.051457</td>\n", + " <td>0.691385</td>\n", + " <td>0.051529</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>0.530845</td>\n", + " <td>0.248395</td>\n", + " <td>0.433733</td>\n", + " <td>0.049458</td>\n", + " <td>0.314959</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>0.142230</td>\n", + " <td>0.746634</td>\n", + " <td>0.536247</td>\n", + " <td>0.096499</td>\n", + " <td>0.123294</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>0.139630</td>\n", + " <td>0.056464</td>\n", + " <td>0.595644</td>\n", + " <td>0.764071</td>\n", + " <td>0.193826</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>0.709624</td>\n", + " <td>0.590262</td>\n", + " <td>0.816268</td>\n", + " <td>0.187931</td>\n", + " <td>0.366224</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>0.982939</td>\n", + " <td>0.260358</td>\n", + " <td>0.918897</td>\n", + " <td>0.531278</td>\n", + " <td>0.304655</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>0.381823</td>\n", + " <td>0.003594</td>\n", + " <td>0.052597</td>\n", + " <td>0.921529</td>\n", + " <td>0.022103</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18</th>\n", + " <td>0.227944</td>\n", + " <td>0.706832</td>\n", + " <td>0.137266</td>\n", + " <td>0.129158</td>\n", + " <td>0.882734</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>0.226257</td>\n", + " <td>0.818213</td>\n", + " <td>0.326071</td>\n", + " <td>0.230419</td>\n", + " <td>0.668891</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " 0 1 2 3 4\n", + "0 0.020780 0.365190 0.673825 0.800112 0.188644\n", + "1 0.660845 0.265913 0.445028 0.889438 0.601047\n", + "2 0.646987 0.926823 0.722838 0.475271 0.827945\n", + "3 0.871724 0.290353 0.099578 0.109949 0.229182\n", + "4 0.704794 0.884062 0.751327 0.595746 0.612269\n", + "5 0.371269 0.560512 0.510264 0.247923 0.618853\n", + "6 0.150398 0.116999 0.934865 0.315723 0.221538\n", + "7 0.556336 0.875514 0.471526 0.539511 0.271221\n", + "8 0.428221 0.546766 0.921274 0.500520 0.400341\n", + "9 0.150170 0.802378 0.608124 0.342871 0.076631\n", + "10 0.099049 0.280748 0.865939 0.214541 0.083318\n", + "11 0.042867 0.701639 0.051457 0.691385 0.051529\n", + "12 0.530845 0.248395 0.433733 0.049458 0.314959\n", + "13 0.142230 0.746634 0.536247 0.096499 0.123294\n", + "14 0.139630 0.056464 0.595644 0.764071 0.193826\n", + "15 0.709624 0.590262 0.816268 0.187931 0.366224\n", + "16 0.982939 0.260358 0.918897 0.531278 0.304655\n", + "17 0.381823 0.003594 0.052597 0.921529 0.022103\n", + "18 0.227944 0.706832 0.137266 0.129158 0.882734\n", + "19 0.226257 0.818213 0.326071 0.230419 0.668891" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ff" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 20 entries, 0 to 19\n", + "Data columns (total 5 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 0 20 non-null float64\n", + " 1 1 20 non-null float64\n", + " 2 2 20 non-null float64\n", + " 3 3 20 non-null float64\n", + " 4 4 20 non-null float64\n", + "dtypes: float64(5)\n", + "memory usage: 868.0 bytes\n" + ] + } + ], + "source": [ + "ff.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>date</th>\n", + " <th>students</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>10/9/2020</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>11/09/2020</td>\n", + " <td>20</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>12/09/2020</td>\n", + " <td>30</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " date students\n", + "0 10/9/2020 10\n", + "1 11/09/2020 20\n", + "2 12/09/2020 30" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#data functon:\n", + "date=pd.DataFrame(\n", + "{\n", + "\"date\":['10/9/2020','11/09/2020','12/09/2020'],\n", + "\"students\":[10,20,30]})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "First Name Gender Start Date Last Login Time Salary Bonus % Senior Management Team \n", + "Angela Female 11/22/2005 6:29 AM 95570 18.523 True Engineering 1\n", + "Jerry Male 3/4/2005 1:00 PM 138705 9.340 True Finance 1\n", + "Ruby Female 8/17/1987 4:20 PM 65476 10.012 True Product 1\n", + "Maria Female 4/23/1993 11:17 AM 130590 11.858 False Finance 1\n", + "Lillian Female 6/5/2016 6:09 AM 59414 1.256 False Product 1\n", + "Larry Male 1/24/1998 4:47 PM 101004 1.389 True Client Services 1\n", + "Kimberly Female 1/14/1999 7:13 AM 41426 14.543 True Finance 1\n", + "Julie Female 10/26/1997 3:19 PM 102508 12.637 True Legal 1\n", + "Jeremy Male 9/21/2010 5:56 AM 90370 7.369 False Human Resources 1\n", + "Brandon Male 12/1/1980 1:08 AM 112807 17.492 True Human Resources 1\n", + "Gary Male 1/27/2008 11:40 PM 109831 5.831 False Sales 1\n", + "Frances Female 8/8/2002 6:51 AM 139852 7.524 True Business Development 1\n", + "Douglas Male 8/6/1993 12:42 PM 97308 6.945 True Marketing 1\n", + "Donna Female 7/22/2010 3:48 AM 81014 1.894 False Product 1\n", + "Diana Female 10/23/1981 10:27 AM 132940 19.082 False Client Services 1\n", + "Dennis Male 4/18/1987 1:35 AM 115163 10.125 False Legal 1\n", + "Shawn Male 12/7/1986 7:45 PM 111737 6.414 False Product 1\n", + "dtype: int64" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ed.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Gender</th>\n", + " <th>Salary</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Male</td>\n", + " <td>97308</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>Male</td>\n", + " <td>61933</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>Female</td>\n", + " <td>130590</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>Male</td>\n", + " <td>138705</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>Male</td>\n", + " <td>101004</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>Male</td>\n", + " <td>115163</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>Female</td>\n", + " <td>65476</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>Female</td>\n", + " <td>45906</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>Female</td>\n", + " <td>95570</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>Female</td>\n", + " <td>139852</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>Female</td>\n", + " <td>63241</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>Female</td>\n", + " <td>102508</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>Male</td>\n", + " <td>112807</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>Male</td>\n", + " <td>109831</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>Female</td>\n", + " <td>41426</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>Female</td>\n", + " <td>59414</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>Male</td>\n", + " <td>90370</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>Male</td>\n", + " <td>111737</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18</th>\n", + " <td>Female</td>\n", + " <td>132940</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>Female</td>\n", + " <td>81014</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Gender Salary\n", + "0 Male 97308\n", + "1 Male 61933\n", + "2 Female 130590\n", + "3 Male 138705\n", + "4 Male 101004\n", + "5 Male 115163\n", + "6 Female 65476\n", + "7 Female 45906\n", + "8 Female 95570\n", + "9 Female 139852\n", + "10 Female 63241\n", + "11 Female 102508\n", + "12 Male 112807\n", + "13 Male 109831\n", + "14 Female 41426\n", + "15 Female 59414\n", + "16 Male 90370\n", + "17 Male 111737\n", + "18 Female 132940\n", + "19 Female 81014" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ed[['Gender','Salary']]" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>First Name</th>\n", + " <th>Gender</th>\n", + " <th>Start Date</th>\n", + " <th>Last Login Time</th>\n", + " <th>Salary</th>\n", + " <th>Bonus %</th>\n", + " <th>Senior Management</th>\n", + " <th>Team</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>Angela</td>\n", + " <td>Female</td>\n", + " <td>11/22/2005</td>\n", + " <td>6:29 AM</td>\n", + " <td>95570</td>\n", + " <td>18.523</td>\n", + " <td>True</td>\n", + " <td>Engineering</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>Frances</td>\n", + " <td>Female</td>\n", + " <td>8/8/2002</td>\n", + " <td>6:51 AM</td>\n", + " <td>139852</td>\n", + " <td>7.524</td>\n", + " <td>True</td>\n", + " <td>Business Development</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>Louise</td>\n", + " <td>Female</td>\n", + " <td>8/12/1980</td>\n", + " <td>9:01 AM</td>\n", + " <td>63241</td>\n", + " <td>15.132</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>Julie</td>\n", + " <td>Female</td>\n", + " <td>10/26/1997</td>\n", + " <td>3:19 PM</td>\n", + " <td>102508</td>\n", + " <td>12.637</td>\n", + " <td>True</td>\n", + " <td>Legal</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " First Name Gender Start Date Last Login Time Salary Bonus % \\\n", + "8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n", + "9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n", + "10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n", + "11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n", + "\n", + " Senior Management Team \n", + "8 True Engineering \n", + "9 True Business Development \n", + "10 True NaN \n", + "11 True Legal " + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#selection by position:rows data:\n", + "ed.iloc[8:12]" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "First Name Angela\n", + "Gender Female\n", + "Start Date 11/22/2005\n", + "Last Login Time 6:29 AM\n", + "Salary 95570\n", + "Bonus % 18.523\n", + "Senior Management True\n", + "Team Engineering\n", + "Name: 8, dtype: object" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ed.loc[8]" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "First Name 1\n", + "Gender 0\n", + "Start Date 0\n", + "Last Login Time 0\n", + "Salary 0\n", + "Bonus % 0\n", + "Senior Management 1\n", + "Team 2\n", + "dtype: int64" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#data cleaning:\n", + "ed.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#total null values:\n", + "ed.isnull().sum().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "156" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ed.notnull().sum().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "#fpr practice on drop we will take the copy of the original data:\n", + "ed2=ed" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>First Name</th>\n", + " <th>Gender</th>\n", + " <th>Start Date</th>\n", + " <th>Last Login Time</th>\n", + " <th>Salary</th>\n", + " <th>Bonus %</th>\n", + " <th>Senior Management</th>\n", + " <th>Team</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Douglas</td>\n", + " <td>Male</td>\n", + " <td>8/6/1993</td>\n", + " <td>12:42 PM</td>\n", + " <td>97308</td>\n", + " <td>6.945</td>\n", + " <td>True</td>\n", + " <td>Marketing</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>Thomas</td>\n", + " <td>Male</td>\n", + " <td>3/31/1996</td>\n", + " <td>6:53 AM</td>\n", + " <td>61933</td>\n", + " <td>4.170</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>Maria</td>\n", + " <td>Female</td>\n", + " <td>4/23/1993</td>\n", + " <td>11:17 AM</td>\n", + " <td>130590</td>\n", + " <td>11.858</td>\n", + " <td>False</td>\n", + " <td>Finance</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>Jerry</td>\n", + " <td>Male</td>\n", + " <td>3/4/2005</td>\n", + " <td>1:00 PM</td>\n", + " <td>138705</td>\n", + " <td>9.340</td>\n", + " <td>True</td>\n", + " <td>Finance</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>Larry</td>\n", + " <td>Male</td>\n", + " <td>1/24/1998</td>\n", + " <td>4:47 PM</td>\n", + " <td>101004</td>\n", + " <td>1.389</td>\n", + " <td>True</td>\n", + " <td>Client Services</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>Dennis</td>\n", + " <td>Male</td>\n", + " <td>4/18/1987</td>\n", + " <td>1:35 AM</td>\n", + " <td>115163</td>\n", + " <td>10.125</td>\n", + " <td>False</td>\n", + " <td>Legal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>Ruby</td>\n", + " <td>Female</td>\n", + " <td>8/17/1987</td>\n", + " <td>4:20 PM</td>\n", + " <td>65476</td>\n", + " <td>10.012</td>\n", + " <td>True</td>\n", + " <td>Product</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>NaN</td>\n", + " <td>Female</td>\n", + " <td>7/20/2015</td>\n", + " <td>10:43 AM</td>\n", + " <td>45906</td>\n", + " <td>11.598</td>\n", + " <td>NaN</td>\n", + " <td>Finance</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>Angela</td>\n", + " <td>Female</td>\n", + " <td>11/22/2005</td>\n", + " <td>6:29 AM</td>\n", + " <td>95570</td>\n", + " <td>18.523</td>\n", + " <td>True</td>\n", + " <td>Engineering</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>Frances</td>\n", + " <td>Female</td>\n", + " <td>8/8/2002</td>\n", + " <td>6:51 AM</td>\n", + " <td>139852</td>\n", + " <td>7.524</td>\n", + " <td>True</td>\n", + " <td>Business Development</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>Louise</td>\n", + " <td>Female</td>\n", + " <td>8/12/1980</td>\n", + " <td>9:01 AM</td>\n", + " <td>63241</td>\n", + " <td>15.132</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>Julie</td>\n", + " <td>Female</td>\n", + " <td>10/26/1997</td>\n", + " <td>3:19 PM</td>\n", + " <td>102508</td>\n", + " <td>12.637</td>\n", + " <td>True</td>\n", + " <td>Legal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>Brandon</td>\n", + " <td>Male</td>\n", + " <td>12/1/1980</td>\n", + " <td>1:08 AM</td>\n", + " <td>112807</td>\n", + " <td>17.492</td>\n", + " <td>True</td>\n", + " <td>Human Resources</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>Gary</td>\n", + " <td>Male</td>\n", + " <td>1/27/2008</td>\n", + " <td>11:40 PM</td>\n", + " <td>109831</td>\n", + " <td>5.831</td>\n", + " <td>False</td>\n", + " <td>Sales</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>Kimberly</td>\n", + " <td>Female</td>\n", + " <td>1/14/1999</td>\n", + " <td>7:13 AM</td>\n", + " <td>41426</td>\n", + " <td>14.543</td>\n", + " <td>True</td>\n", + " <td>Finance</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>Lillian</td>\n", + " <td>Female</td>\n", + " <td>6/5/2016</td>\n", + " <td>6:09 AM</td>\n", + " <td>59414</td>\n", + " <td>1.256</td>\n", + " <td>False</td>\n", + " <td>Product</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>Jeremy</td>\n", + " <td>Male</td>\n", + " <td>9/21/2010</td>\n", + " <td>5:56 AM</td>\n", + " <td>90370</td>\n", + " <td>7.369</td>\n", + " <td>False</td>\n", + " <td>Human Resources</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>Shawn</td>\n", + " <td>Male</td>\n", + " <td>12/7/1986</td>\n", + " <td>7:45 PM</td>\n", + " <td>111737</td>\n", + " <td>6.414</td>\n", + " <td>False</td>\n", + " <td>Product</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18</th>\n", + " <td>Diana</td>\n", + " <td>Female</td>\n", + " <td>10/23/1981</td>\n", + " <td>10:27 AM</td>\n", + " <td>132940</td>\n", + " <td>19.082</td>\n", + " <td>False</td>\n", + " <td>Client Services</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>Donna</td>\n", + " <td>Female</td>\n", + " <td>7/22/2010</td>\n", + " <td>3:48 AM</td>\n", + " <td>81014</td>\n", + " <td>1.894</td>\n", + " <td>False</td>\n", + " <td>Product</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " First Name Gender Start Date Last Login Time Salary Bonus % \\\n", + "0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n", + "1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n", + "2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n", + "3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n", + "4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n", + "5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n", + "6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n", + "7 NaN Female 7/20/2015 10:43 AM 45906 11.598 \n", + "8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n", + "9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n", + "10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n", + "11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n", + "12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n", + "13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n", + "14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n", + "15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n", + "16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n", + "17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n", + "18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n", + "19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n", + "\n", + " Senior Management Team \n", + "0 True Marketing \n", + "1 True NaN \n", + "2 False Finance \n", + "3 True Finance \n", + "4 True Client Services \n", + "5 False Legal \n", + "6 True Product \n", + "7 NaN Finance \n", + "8 True Engineering \n", + "9 True Business Development \n", + "10 True NaN \n", + "11 True Legal \n", + "12 True Human Resources \n", + "13 False Sales \n", + "14 True Finance \n", + "15 False Product \n", + "16 False Human Resources \n", + "17 False Product \n", + "18 False Client Services \n", + "19 False Product " + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ed2" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "prasent null values: 0\n" + ] + } + ], + "source": [ + "#removing the totyal columns if they are with the null values:\n", + "ed3=ed2.dropna(axis=1)\n", + "print(\"prasent null values:\",ed3.isnull().sum().sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "<ipython-input-96-9a2616dc4607>:1: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " ed2.fillna(10,inplace=True)\n" + ] + } + ], + "source": [ + "ed2.fillna(10,inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "First Name 0\n", + "Gender 0\n", + "Start Date 0\n", + "Last Login Time 0\n", + "Salary 0\n", + "Bonus % 0\n", + "Senior Management 0\n", + "Team 0\n", + "dtype: int64" + ] + }, + "execution_count": 97, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ed2.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>First Name</th>\n", + " <th>Gender</th>\n", + " <th>Start Date</th>\n", + " <th>Last Login Time</th>\n", + " <th>Salary</th>\n", + " <th>Bonus %</th>\n", + " <th>Senior Management</th>\n", + " <th>Team</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Douglas</td>\n", + " <td>Male</td>\n", + " <td>8/6/1993</td>\n", + " <td>12:42 PM</td>\n", + " <td>97308</td>\n", + " <td>6.945</td>\n", + " <td>True</td>\n", + " <td>Marketing</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>Thomas</td>\n", + " <td>Male</td>\n", + " <td>3/31/1996</td>\n", + " <td>6:53 AM</td>\n", + " <td>61933</td>\n", + " <td>4.170</td>\n", + " <td>True</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>Maria</td>\n", + " <td>Female</td>\n", + " <td>4/23/1993</td>\n", + " <td>11:17 AM</td>\n", + " <td>130590</td>\n", + " <td>11.858</td>\n", + " <td>False</td>\n", + " <td>Finance</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>Jerry</td>\n", + " <td>Male</td>\n", + " <td>3/4/2005</td>\n", + " <td>1:00 PM</td>\n", + " <td>138705</td>\n", + " <td>9.340</td>\n", + " <td>True</td>\n", + " <td>Finance</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>Larry</td>\n", + " <td>Male</td>\n", + " <td>1/24/1998</td>\n", + " <td>4:47 PM</td>\n", + " <td>101004</td>\n", + " <td>1.389</td>\n", + " <td>True</td>\n", + " <td>Client Services</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>Dennis</td>\n", + " <td>Male</td>\n", + " <td>4/18/1987</td>\n", + " <td>1:35 AM</td>\n", + " <td>115163</td>\n", + " <td>10.125</td>\n", + " <td>False</td>\n", + " <td>Legal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>Ruby</td>\n", + " <td>Female</td>\n", + " <td>8/17/1987</td>\n", + " <td>4:20 PM</td>\n", + " <td>65476</td>\n", + " <td>10.012</td>\n", + " <td>True</td>\n", + " <td>Product</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>10</td>\n", + " <td>Female</td>\n", + " <td>7/20/2015</td>\n", + " <td>10:43 AM</td>\n", + " <td>45906</td>\n", + " <td>11.598</td>\n", + " <td>10</td>\n", + " <td>Finance</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>Angela</td>\n", + " <td>Female</td>\n", + " <td>11/22/2005</td>\n", + " <td>6:29 AM</td>\n", + " <td>95570</td>\n", + " <td>18.523</td>\n", + " <td>True</td>\n", + " <td>Engineering</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>Frances</td>\n", + " <td>Female</td>\n", + " <td>8/8/2002</td>\n", + " <td>6:51 AM</td>\n", + " <td>139852</td>\n", + " <td>7.524</td>\n", + " <td>True</td>\n", + " <td>Business Development</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>Louise</td>\n", + " <td>Female</td>\n", + " <td>8/12/1980</td>\n", + " <td>9:01 AM</td>\n", + " <td>63241</td>\n", + " <td>15.132</td>\n", + " <td>True</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>Julie</td>\n", + " <td>Female</td>\n", + " <td>10/26/1997</td>\n", + " <td>3:19 PM</td>\n", + " <td>102508</td>\n", + " <td>12.637</td>\n", + " <td>True</td>\n", + " <td>Legal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>Brandon</td>\n", + " <td>Male</td>\n", + " <td>12/1/1980</td>\n", + " <td>1:08 AM</td>\n", + " <td>112807</td>\n", + " <td>17.492</td>\n", + " <td>True</td>\n", + " <td>Human Resources</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>Gary</td>\n", + " <td>Male</td>\n", + " <td>1/27/2008</td>\n", + " <td>11:40 PM</td>\n", + " <td>109831</td>\n", + " <td>5.831</td>\n", + " <td>False</td>\n", + " <td>Sales</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>Kimberly</td>\n", + " <td>Female</td>\n", + " <td>1/14/1999</td>\n", + " <td>7:13 AM</td>\n", + " <td>41426</td>\n", + " <td>14.543</td>\n", + " <td>True</td>\n", + " <td>Finance</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>Lillian</td>\n", + " <td>Female</td>\n", + " <td>6/5/2016</td>\n", + " <td>6:09 AM</td>\n", + " <td>59414</td>\n", + " <td>1.256</td>\n", + " <td>False</td>\n", + " <td>Product</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>Jeremy</td>\n", + " <td>Male</td>\n", + " <td>9/21/2010</td>\n", + " <td>5:56 AM</td>\n", + " <td>90370</td>\n", + " <td>7.369</td>\n", + " <td>False</td>\n", + " <td>Human Resources</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>Shawn</td>\n", + " <td>Male</td>\n", + " <td>12/7/1986</td>\n", + " <td>7:45 PM</td>\n", + " <td>111737</td>\n", + " <td>6.414</td>\n", + " <td>False</td>\n", + " <td>Product</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18</th>\n", + " <td>Diana</td>\n", + " <td>Female</td>\n", + " <td>10/23/1981</td>\n", + " <td>10:27 AM</td>\n", + " <td>132940</td>\n", + " <td>19.082</td>\n", + " <td>False</td>\n", + " <td>Client Services</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>Donna</td>\n", + " <td>Female</td>\n", + " <td>7/22/2010</td>\n", + " <td>3:48 AM</td>\n", + " <td>81014</td>\n", + " <td>1.894</td>\n", + " <td>False</td>\n", + " <td>Product</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " First Name Gender Start Date Last Login Time Salary Bonus % \\\n", + "0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n", + "1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n", + "2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n", + "3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n", + "4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n", + "5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n", + "6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n", + "7 10 Female 7/20/2015 10:43 AM 45906 11.598 \n", + "8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n", + "9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n", + "10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n", + "11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n", + "12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n", + "13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n", + "14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n", + "15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n", + "16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n", + "17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n", + "18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n", + "19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n", + "\n", + " Senior Management Team \n", + "0 True Marketing \n", + "1 True 10 \n", + "2 False Finance \n", + "3 True Finance \n", + "4 True Client Services \n", + "5 False Legal \n", + "6 True Product \n", + "7 10 Finance \n", + "8 True Engineering \n", + "9 True Business Development \n", + "10 True 10 \n", + "11 True Legal \n", + "12 True Human Resources \n", + "13 False Sales \n", + "14 True Finance \n", + "15 False Product \n", + "16 False Human Resources \n", + "17 False Product \n", + "18 False Client Services \n", + "19 False Product " + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ed2" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>First Name</th>\n", + " <th>Gender</th>\n", + " <th>Start Date</th>\n", + " <th>Last Login Time</th>\n", + " <th>Salary</th>\n", + " <th>Bonus %</th>\n", + " <th>Senior Management</th>\n", + " <th>Team</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Douglas</td>\n", + " <td>Male</td>\n", + " <td>8/6/1993</td>\n", + " <td>12:42 PM</td>\n", + " <td>97308</td>\n", + " <td>6.945</td>\n", + " <td>True</td>\n", + " <td>Marketing</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>Thomas</td>\n", + " <td>Male</td>\n", + " <td>3/31/1996</td>\n", + " <td>6:53 AM</td>\n", + " <td>61933</td>\n", + " <td>4.170</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>Maria</td>\n", + " <td>Female</td>\n", + " <td>4/23/1993</td>\n", + " <td>11:17 AM</td>\n", + " <td>130590</td>\n", + " <td>11.858</td>\n", + " <td>False</td>\n", + " <td>Finance</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>Jerry</td>\n", + " <td>Male</td>\n", + " <td>3/4/2005</td>\n", + " <td>1:00 PM</td>\n", + " <td>138705</td>\n", + " <td>9.340</td>\n", + " <td>True</td>\n", + " <td>Finance</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>Larry</td>\n", + " <td>Male</td>\n", + " <td>1/24/1998</td>\n", + " <td>4:47 PM</td>\n", + " <td>101004</td>\n", + " <td>1.389</td>\n", + " <td>True</td>\n", + " <td>Client Services</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>Dennis</td>\n", + " <td>Male</td>\n", + " <td>4/18/1987</td>\n", + " <td>1:35 AM</td>\n", + " <td>115163</td>\n", + " <td>10.125</td>\n", + " <td>False</td>\n", + " <td>Legal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>Ruby</td>\n", + " <td>Female</td>\n", + " <td>8/17/1987</td>\n", + " <td>4:20 PM</td>\n", + " <td>65476</td>\n", + " <td>10.012</td>\n", + " <td>True</td>\n", + " <td>Product</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>NaN</td>\n", + " <td>Female</td>\n", + " <td>7/20/2015</td>\n", + " <td>10:43 AM</td>\n", + " <td>45906</td>\n", + " <td>11.598</td>\n", + " <td>NaN</td>\n", + " <td>Finance</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>Angela</td>\n", + " <td>Female</td>\n", + " <td>11/22/2005</td>\n", + " <td>6:29 AM</td>\n", + " <td>95570</td>\n", + " <td>18.523</td>\n", + " <td>True</td>\n", + " <td>Engineering</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>Frances</td>\n", + " <td>Female</td>\n", + " <td>8/8/2002</td>\n", + " <td>6:51 AM</td>\n", + " <td>139852</td>\n", + " <td>7.524</td>\n", + " <td>True</td>\n", + " <td>Business Development</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>Louise</td>\n", + " <td>Female</td>\n", + " <td>8/12/1980</td>\n", + " <td>9:01 AM</td>\n", + " <td>63241</td>\n", + " <td>15.132</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>Julie</td>\n", + " <td>Female</td>\n", + " <td>10/26/1997</td>\n", + " <td>3:19 PM</td>\n", + " <td>102508</td>\n", + " <td>12.637</td>\n", + " <td>True</td>\n", + " <td>Legal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>Brandon</td>\n", + " <td>Male</td>\n", + " <td>12/1/1980</td>\n", + " <td>1:08 AM</td>\n", + " <td>112807</td>\n", + " <td>17.492</td>\n", + " <td>True</td>\n", + " <td>Human Resources</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>Gary</td>\n", + " <td>Male</td>\n", + " <td>1/27/2008</td>\n", + " <td>11:40 PM</td>\n", + " <td>109831</td>\n", + " <td>5.831</td>\n", + " <td>False</td>\n", + " <td>Sales</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>Kimberly</td>\n", + " <td>Female</td>\n", + " <td>1/14/1999</td>\n", + " <td>7:13 AM</td>\n", + " <td>41426</td>\n", + " <td>14.543</td>\n", + " <td>True</td>\n", + " <td>Finance</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>Lillian</td>\n", + " <td>Female</td>\n", + " <td>6/5/2016</td>\n", + " <td>6:09 AM</td>\n", + " <td>59414</td>\n", + " <td>1.256</td>\n", + " <td>False</td>\n", + " <td>Product</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>Jeremy</td>\n", + " <td>Male</td>\n", + " <td>9/21/2010</td>\n", + " <td>5:56 AM</td>\n", + " <td>90370</td>\n", + " <td>7.369</td>\n", + " <td>False</td>\n", + " <td>Human Resources</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>Shawn</td>\n", + " <td>Male</td>\n", + " <td>12/7/1986</td>\n", + " <td>7:45 PM</td>\n", + " <td>111737</td>\n", + " <td>6.414</td>\n", + " <td>False</td>\n", + " <td>Product</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18</th>\n", + " <td>Diana</td>\n", + " <td>Female</td>\n", + " <td>10/23/1981</td>\n", + " <td>10:27 AM</td>\n", + " <td>132940</td>\n", + " <td>19.082</td>\n", + " <td>False</td>\n", + " <td>Client Services</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>Donna</td>\n", + " <td>Female</td>\n", + " <td>7/22/2010</td>\n", + " <td>3:48 AM</td>\n", + " <td>81014</td>\n", + " <td>1.894</td>\n", + " <td>False</td>\n", + " <td>Product</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " First Name Gender Start Date Last Login Time Salary Bonus % \\\n", + "0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n", + "1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n", + "2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n", + "3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n", + "4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n", + "5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n", + "6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n", + "7 NaN Female 7/20/2015 10:43 AM 45906 11.598 \n", + "8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n", + "9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n", + "10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n", + "11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n", + "12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n", + "13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n", + "14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n", + "15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n", + "16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n", + "17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n", + "18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n", + "19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n", + "\n", + " Senior Management Team \n", + "0 True Marketing \n", + "1 True NaN \n", + "2 False Finance \n", + "3 True Finance \n", + "4 True Client Services \n", + "5 False Legal \n", + "6 True Product \n", + "7 NaN Finance \n", + "8 True Engineering \n", + "9 True Business Development \n", + "10 True NaN \n", + "11 True Legal \n", + "12 True Human Resources \n", + "13 False Sales \n", + "14 True Finance \n", + "15 False Product \n", + "16 False Human Resources \n", + "17 False Product \n", + "18 False Client Services \n", + "19 False Product " + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ed5=Employ.head(20)\n", + "ed5" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "First Name 0\n", + "Gender 0\n", + "Start Date 0\n", + "Last Login Time 0\n", + "Salary 0\n", + "Bonus % 0\n", + "Senior Management 0\n", + "Team 0\n", + "dtype: int64" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ed2.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>date</th>\n", + " <th>students</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>10/9/2020</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>11/09/2020</td>\n", + " <td>20</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>12/09/2020</td>\n", + " <td>30</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " date students\n", + "0 10/9/2020 10\n", + "1 11/09/2020 20\n", + "2 12/09/2020 30" + ] + }, + "execution_count": 120, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "date" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "date object\n", + "students int64\n", + "dtype: object" + ] + }, + "execution_count": 124, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "date.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>check</th>\n", + " <th>students</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>10/9/2020</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>11/09/2020</td>\n", + " <td>20</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>12/09/2020</td>\n", + " <td>30</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " check students\n", + "0 10/9/2020 10\n", + "1 11/09/2020 20\n", + "2 12/09/2020 30" + ] + }, + "execution_count": 131, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#rename for the date with the check:\n", + "date.rename(columns={'date':'check'})" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>date</th>\n", + " <th>students</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>12/09/2020</td>\n", + " <td>30</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>11/09/2020</td>\n", + " <td>20</td>\n", + " </tr>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>10/9/2020</td>\n", + " <td>10</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " date students\n", + "2 12/09/2020 30\n", + "1 11/09/2020 20\n", + "0 10/9/2020 10" + ] + }, + "execution_count": 134, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "date.sort_values('students',ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0\n", + "0 1\n", + "1 2\n", + "2 3\n", + "3 4\n", + "4 5\n", + "5 6\n" + ] + } + ], + "source": [ + "#creating the data frame:\n", + "import pandas as pd\n", + "data=[1,2,3,4,5,6]\n", + "d_frame=pd.DataFrame(data)\n", + "print(d_frame)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0 1 2\n", + "x std1 std2 std3\n", + "y azar ameer varun\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "arr=np.array([[\"std1\",\"std2\",\"std3\"],[\"azar\",\"ameer\",\"varun\"]])\n", + "d_frame2=pd.DataFrame(arr,index=['x','y'])\n", + "print(d_frame2)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>x</th>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>y</th>\n", + " <td>False</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " 0\n", + "x False\n", + "y False" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d_frame2.isnull()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>count</th>\n", + " <td>6.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>mean</th>\n", + " <td>3.500000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>std</th>\n", + " <td>1.870829</td>\n", + " </tr>\n", + " <tr>\n", + " <th>min</th>\n", + " <td>1.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>25%</th>\n", + " <td>2.250000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>50%</th>\n", + " <td>3.500000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>75%</th>\n", + " <td>4.750000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>max</th>\n", + " <td>6.000000</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " 0\n", + "count 6.000000\n", + "mean 3.500000\n", + "std 1.870829\n", + "min 1.000000\n", + "25% 2.250000\n", + "50% 3.500000\n", + "75% 4.750000\n", + "max 6.000000" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d_frame.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 3.5\n", + "dtype: float64" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d_frame.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 6\n", + "dtype: int64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d_frame.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 6\n", + "dtype: int64" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d_frame.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 3.5\n", + "dtype: float64" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d_frame.median()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1.870829\n", + "dtype: float64" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d_frame.std()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 3.5\n", + "dtype: float64" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d_frame.apply(np.mean)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/y8/34wjlypd37q4zn8rwhg1vsqc0000gn/T/ipykernel_7089/515644098.py:1: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n", + " data=np.array([[1,2,3,4,5,6],[1,2,3,4,5,6,7]])\n" + ] + }, + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>[1, 2, 3, 4, 5, 6]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>[1, 2, 3, 4, 5, 6, 7]</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " 0\n", + "0 [1, 2, 3, 4, 5, 6]\n", + "1 [1, 2, 3, 4, 5, 6, 7]" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data=np.array([[1,2,3,4,5,6],[1,2,3,4,5,6,7]])\n", + "data_frame=pd.DataFrame(data)\n", + "data_frame" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}