{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 0\n",
"0 1\n",
"1 2\n",
"2 3\n",
"3 4\n"
]
}
],
"source": [
"df=[1,2,3,4]\n",
"print(pd.DataFrame(df))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 1\n",
"1 2\n",
"2 3\n",
"3 4\n",
"dtype: int64\n"
]
}
],
"source": [
"print(pd.Series(df))"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {},
"outputs": [],
"source": [
"Employ=pd.read_csv(\"employees.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {},
"outputs": [],
"source": [
"Employ_dub=Employ.head(20)"
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" First Name | \n",
" Gender | \n",
" Start Date | \n",
" Last Login Time | \n",
" Salary | \n",
" Bonus % | \n",
" Senior Management | \n",
" Team | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Douglas | \n",
" Male | \n",
" 8/6/1993 | \n",
" 12:42 PM | \n",
" 97308 | \n",
" 6.945 | \n",
" True | \n",
" Marketing | \n",
"
\n",
" \n",
" 1 | \n",
" Thomas | \n",
" Male | \n",
" 3/31/1996 | \n",
" 6:53 AM | \n",
" 61933 | \n",
" 4.170 | \n",
" True | \n",
" NaN | \n",
"
\n",
" \n",
" 2 | \n",
" Maria | \n",
" Female | \n",
" 4/23/1993 | \n",
" 11:17 AM | \n",
" 130590 | \n",
" 11.858 | \n",
" False | \n",
" Finance | \n",
"
\n",
" \n",
" 3 | \n",
" Jerry | \n",
" Male | \n",
" 3/4/2005 | \n",
" 1:00 PM | \n",
" 138705 | \n",
" 9.340 | \n",
" True | \n",
" Finance | \n",
"
\n",
" \n",
" 4 | \n",
" Larry | \n",
" Male | \n",
" 1/24/1998 | \n",
" 4:47 PM | \n",
" 101004 | \n",
" 1.389 | \n",
" True | \n",
" Client Services | \n",
"
\n",
" \n",
" 5 | \n",
" Dennis | \n",
" Male | \n",
" 4/18/1987 | \n",
" 1:35 AM | \n",
" 115163 | \n",
" 10.125 | \n",
" False | \n",
" Legal | \n",
"
\n",
" \n",
" 6 | \n",
" Ruby | \n",
" Female | \n",
" 8/17/1987 | \n",
" 4:20 PM | \n",
" 65476 | \n",
" 10.012 | \n",
" True | \n",
" Product | \n",
"
\n",
" \n",
" 7 | \n",
" NaN | \n",
" Female | \n",
" 7/20/2015 | \n",
" 10:43 AM | \n",
" 45906 | \n",
" 11.598 | \n",
" NaN | \n",
" Finance | \n",
"
\n",
" \n",
" 8 | \n",
" Angela | \n",
" Female | \n",
" 11/22/2005 | \n",
" 6:29 AM | \n",
" 95570 | \n",
" 18.523 | \n",
" True | \n",
" Engineering | \n",
"
\n",
" \n",
" 9 | \n",
" Frances | \n",
" Female | \n",
" 8/8/2002 | \n",
" 6:51 AM | \n",
" 139852 | \n",
" 7.524 | \n",
" True | \n",
" Business Development | \n",
"
\n",
" \n",
" 10 | \n",
" Louise | \n",
" Female | \n",
" 8/12/1980 | \n",
" 9:01 AM | \n",
" 63241 | \n",
" 15.132 | \n",
" True | \n",
" NaN | \n",
"
\n",
" \n",
" 11 | \n",
" Julie | \n",
" Female | \n",
" 10/26/1997 | \n",
" 3:19 PM | \n",
" 102508 | \n",
" 12.637 | \n",
" True | \n",
" Legal | \n",
"
\n",
" \n",
" 12 | \n",
" Brandon | \n",
" Male | \n",
" 12/1/1980 | \n",
" 1:08 AM | \n",
" 112807 | \n",
" 17.492 | \n",
" True | \n",
" Human Resources | \n",
"
\n",
" \n",
" 13 | \n",
" Gary | \n",
" Male | \n",
" 1/27/2008 | \n",
" 11:40 PM | \n",
" 109831 | \n",
" 5.831 | \n",
" False | \n",
" Sales | \n",
"
\n",
" \n",
" 14 | \n",
" Kimberly | \n",
" Female | \n",
" 1/14/1999 | \n",
" 7:13 AM | \n",
" 41426 | \n",
" 14.543 | \n",
" True | \n",
" Finance | \n",
"
\n",
" \n",
" 15 | \n",
" Lillian | \n",
" Female | \n",
" 6/5/2016 | \n",
" 6:09 AM | \n",
" 59414 | \n",
" 1.256 | \n",
" False | \n",
" Product | \n",
"
\n",
" \n",
" 16 | \n",
" Jeremy | \n",
" Male | \n",
" 9/21/2010 | \n",
" 5:56 AM | \n",
" 90370 | \n",
" 7.369 | \n",
" False | \n",
" Human Resources | \n",
"
\n",
" \n",
" 17 | \n",
" Shawn | \n",
" Male | \n",
" 12/7/1986 | \n",
" 7:45 PM | \n",
" 111737 | \n",
" 6.414 | \n",
" False | \n",
" Product | \n",
"
\n",
" \n",
" 18 | \n",
" Diana | \n",
" Female | \n",
" 10/23/1981 | \n",
" 10:27 AM | \n",
" 132940 | \n",
" 19.082 | \n",
" False | \n",
" Client Services | \n",
"
\n",
" \n",
" 19 | \n",
" Donna | \n",
" Female | \n",
" 7/22/2010 | \n",
" 3:48 AM | \n",
" 81014 | \n",
" 1.894 | \n",
" False | \n",
" Product | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" First Name Gender Start Date Last Login Time Salary Bonus % \\\n",
"0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n",
"1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n",
"2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n",
"3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n",
"4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n",
"5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n",
"6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n",
"7 NaN Female 7/20/2015 10:43 AM 45906 11.598 \n",
"8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n",
"9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n",
"10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n",
"11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n",
"12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n",
"13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n",
"14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n",
"15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n",
"16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n",
"17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n",
"18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n",
"19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n",
"\n",
" Senior Management Team \n",
"0 True Marketing \n",
"1 True NaN \n",
"2 False Finance \n",
"3 True Finance \n",
"4 True Client Services \n",
"5 False Legal \n",
"6 True Product \n",
"7 NaN Finance \n",
"8 True Engineering \n",
"9 True Business Development \n",
"10 True NaN \n",
"11 True Legal \n",
"12 True Human Resources \n",
"13 False Sales \n",
"14 True Finance \n",
"15 False Product \n",
"16 False Human Resources \n",
"17 False Product \n",
"18 False Client Services \n",
"19 False Product "
]
},
"execution_count": 117,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Employ_dub"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 20 entries, 0 to 19\n",
"Data columns (total 8 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 First Name 19 non-null object \n",
" 1 Gender 20 non-null object \n",
" 2 Start Date 20 non-null object \n",
" 3 Last Login Time 20 non-null object \n",
" 4 Salary 20 non-null int64 \n",
" 5 Bonus % 20 non-null float64\n",
" 6 Senior Management 19 non-null object \n",
" 7 Team 18 non-null object \n",
"dtypes: float64(1), int64(1), object(6)\n",
"memory usage: 868.0+ bytes\n"
]
}
],
"source": [
"#total info about the employee\n",
"Employ_dub.info()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" First Name | \n",
" Gender | \n",
" Start Date | \n",
" Last Login Time | \n",
" Salary | \n",
" Bonus % | \n",
" Senior Management | \n",
" Team | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 1 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" True | \n",
"
\n",
" \n",
" 2 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 3 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 4 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 5 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 6 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 7 | \n",
" True | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" True | \n",
" False | \n",
"
\n",
" \n",
" 8 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 9 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 10 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" True | \n",
"
\n",
" \n",
" 11 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 12 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 13 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 14 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 15 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 16 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 17 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 18 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 19 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" First Name Gender Start Date Last Login Time Salary Bonus % \\\n",
"0 False False False False False False \n",
"1 False False False False False False \n",
"2 False False False False False False \n",
"3 False False False False False False \n",
"4 False False False False False False \n",
"5 False False False False False False \n",
"6 False False False False False False \n",
"7 True False False False False False \n",
"8 False False False False False False \n",
"9 False False False False False False \n",
"10 False False False False False False \n",
"11 False False False False False False \n",
"12 False False False False False False \n",
"13 False False False False False False \n",
"14 False False False False False False \n",
"15 False False False False False False \n",
"16 False False False False False False \n",
"17 False False False False False False \n",
"18 False False False False False False \n",
"19 False False False False False False \n",
"\n",
" Senior Management Team \n",
"0 False False \n",
"1 False True \n",
"2 False False \n",
"3 False False \n",
"4 False False \n",
"5 False False \n",
"6 False False \n",
"7 True False \n",
"8 False False \n",
"9 False False \n",
"10 False True \n",
"11 False False \n",
"12 False False \n",
"13 False False \n",
"14 False False \n",
"15 False False \n",
"16 False False \n",
"17 False False \n",
"18 False False \n",
"19 False False "
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Employ_dub.isnull()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"First Name 1\n",
"Gender 0\n",
"Start Date 0\n",
"Last Login Time 0\n",
"Salary 0\n",
"Bonus % 0\n",
"Senior Management 1\n",
"Team 2\n",
"dtype: int64"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#checking for the null values in the dataset of employee\n",
"Employ_dub.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"#changing the name of the dataset\n",
"ed=Employ_dub"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(20, 8)"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#dimension of the dataset\n",
"ed.shape"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['First Name', 'Gender', 'Start Date', 'Last Login Time', 'Salary',\n",
" 'Bonus %', 'Senior Management', 'Team'],\n",
" dtype='object')"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ed.columns"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"#working on the dictionary for a while:"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"#creating test objects:\n",
"import numpy as np\n",
"ff=pd.DataFrame(np.random.rand(20,5))"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.020780 | \n",
" 0.365190 | \n",
" 0.673825 | \n",
" 0.800112 | \n",
" 0.188644 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.660845 | \n",
" 0.265913 | \n",
" 0.445028 | \n",
" 0.889438 | \n",
" 0.601047 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.646987 | \n",
" 0.926823 | \n",
" 0.722838 | \n",
" 0.475271 | \n",
" 0.827945 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.871724 | \n",
" 0.290353 | \n",
" 0.099578 | \n",
" 0.109949 | \n",
" 0.229182 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.704794 | \n",
" 0.884062 | \n",
" 0.751327 | \n",
" 0.595746 | \n",
" 0.612269 | \n",
"
\n",
" \n",
" 5 | \n",
" 0.371269 | \n",
" 0.560512 | \n",
" 0.510264 | \n",
" 0.247923 | \n",
" 0.618853 | \n",
"
\n",
" \n",
" 6 | \n",
" 0.150398 | \n",
" 0.116999 | \n",
" 0.934865 | \n",
" 0.315723 | \n",
" 0.221538 | \n",
"
\n",
" \n",
" 7 | \n",
" 0.556336 | \n",
" 0.875514 | \n",
" 0.471526 | \n",
" 0.539511 | \n",
" 0.271221 | \n",
"
\n",
" \n",
" 8 | \n",
" 0.428221 | \n",
" 0.546766 | \n",
" 0.921274 | \n",
" 0.500520 | \n",
" 0.400341 | \n",
"
\n",
" \n",
" 9 | \n",
" 0.150170 | \n",
" 0.802378 | \n",
" 0.608124 | \n",
" 0.342871 | \n",
" 0.076631 | \n",
"
\n",
" \n",
" 10 | \n",
" 0.099049 | \n",
" 0.280748 | \n",
" 0.865939 | \n",
" 0.214541 | \n",
" 0.083318 | \n",
"
\n",
" \n",
" 11 | \n",
" 0.042867 | \n",
" 0.701639 | \n",
" 0.051457 | \n",
" 0.691385 | \n",
" 0.051529 | \n",
"
\n",
" \n",
" 12 | \n",
" 0.530845 | \n",
" 0.248395 | \n",
" 0.433733 | \n",
" 0.049458 | \n",
" 0.314959 | \n",
"
\n",
" \n",
" 13 | \n",
" 0.142230 | \n",
" 0.746634 | \n",
" 0.536247 | \n",
" 0.096499 | \n",
" 0.123294 | \n",
"
\n",
" \n",
" 14 | \n",
" 0.139630 | \n",
" 0.056464 | \n",
" 0.595644 | \n",
" 0.764071 | \n",
" 0.193826 | \n",
"
\n",
" \n",
" 15 | \n",
" 0.709624 | \n",
" 0.590262 | \n",
" 0.816268 | \n",
" 0.187931 | \n",
" 0.366224 | \n",
"
\n",
" \n",
" 16 | \n",
" 0.982939 | \n",
" 0.260358 | \n",
" 0.918897 | \n",
" 0.531278 | \n",
" 0.304655 | \n",
"
\n",
" \n",
" 17 | \n",
" 0.381823 | \n",
" 0.003594 | \n",
" 0.052597 | \n",
" 0.921529 | \n",
" 0.022103 | \n",
"
\n",
" \n",
" 18 | \n",
" 0.227944 | \n",
" 0.706832 | \n",
" 0.137266 | \n",
" 0.129158 | \n",
" 0.882734 | \n",
"
\n",
" \n",
" 19 | \n",
" 0.226257 | \n",
" 0.818213 | \n",
" 0.326071 | \n",
" 0.230419 | \n",
" 0.668891 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4\n",
"0 0.020780 0.365190 0.673825 0.800112 0.188644\n",
"1 0.660845 0.265913 0.445028 0.889438 0.601047\n",
"2 0.646987 0.926823 0.722838 0.475271 0.827945\n",
"3 0.871724 0.290353 0.099578 0.109949 0.229182\n",
"4 0.704794 0.884062 0.751327 0.595746 0.612269\n",
"5 0.371269 0.560512 0.510264 0.247923 0.618853\n",
"6 0.150398 0.116999 0.934865 0.315723 0.221538\n",
"7 0.556336 0.875514 0.471526 0.539511 0.271221\n",
"8 0.428221 0.546766 0.921274 0.500520 0.400341\n",
"9 0.150170 0.802378 0.608124 0.342871 0.076631\n",
"10 0.099049 0.280748 0.865939 0.214541 0.083318\n",
"11 0.042867 0.701639 0.051457 0.691385 0.051529\n",
"12 0.530845 0.248395 0.433733 0.049458 0.314959\n",
"13 0.142230 0.746634 0.536247 0.096499 0.123294\n",
"14 0.139630 0.056464 0.595644 0.764071 0.193826\n",
"15 0.709624 0.590262 0.816268 0.187931 0.366224\n",
"16 0.982939 0.260358 0.918897 0.531278 0.304655\n",
"17 0.381823 0.003594 0.052597 0.921529 0.022103\n",
"18 0.227944 0.706832 0.137266 0.129158 0.882734\n",
"19 0.226257 0.818213 0.326071 0.230419 0.668891"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ff"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 20 entries, 0 to 19\n",
"Data columns (total 5 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 0 20 non-null float64\n",
" 1 1 20 non-null float64\n",
" 2 2 20 non-null float64\n",
" 3 3 20 non-null float64\n",
" 4 4 20 non-null float64\n",
"dtypes: float64(5)\n",
"memory usage: 868.0 bytes\n"
]
}
],
"source": [
"ff.info()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" students | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 10/9/2020 | \n",
" 10 | \n",
"
\n",
" \n",
" 1 | \n",
" 11/09/2020 | \n",
" 20 | \n",
"
\n",
" \n",
" 2 | \n",
" 12/09/2020 | \n",
" 30 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date students\n",
"0 10/9/2020 10\n",
"1 11/09/2020 20\n",
"2 12/09/2020 30"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#data functon:\n",
"date=pd.DataFrame(\n",
"{\n",
"\"date\":['10/9/2020','11/09/2020','12/09/2020'],\n",
"\"students\":[10,20,30]})\n"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"First Name Gender Start Date Last Login Time Salary Bonus % Senior Management Team \n",
"Angela Female 11/22/2005 6:29 AM 95570 18.523 True Engineering 1\n",
"Jerry Male 3/4/2005 1:00 PM 138705 9.340 True Finance 1\n",
"Ruby Female 8/17/1987 4:20 PM 65476 10.012 True Product 1\n",
"Maria Female 4/23/1993 11:17 AM 130590 11.858 False Finance 1\n",
"Lillian Female 6/5/2016 6:09 AM 59414 1.256 False Product 1\n",
"Larry Male 1/24/1998 4:47 PM 101004 1.389 True Client Services 1\n",
"Kimberly Female 1/14/1999 7:13 AM 41426 14.543 True Finance 1\n",
"Julie Female 10/26/1997 3:19 PM 102508 12.637 True Legal 1\n",
"Jeremy Male 9/21/2010 5:56 AM 90370 7.369 False Human Resources 1\n",
"Brandon Male 12/1/1980 1:08 AM 112807 17.492 True Human Resources 1\n",
"Gary Male 1/27/2008 11:40 PM 109831 5.831 False Sales 1\n",
"Frances Female 8/8/2002 6:51 AM 139852 7.524 True Business Development 1\n",
"Douglas Male 8/6/1993 12:42 PM 97308 6.945 True Marketing 1\n",
"Donna Female 7/22/2010 3:48 AM 81014 1.894 False Product 1\n",
"Diana Female 10/23/1981 10:27 AM 132940 19.082 False Client Services 1\n",
"Dennis Male 4/18/1987 1:35 AM 115163 10.125 False Legal 1\n",
"Shawn Male 12/7/1986 7:45 PM 111737 6.414 False Product 1\n",
"dtype: int64"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ed.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Gender | \n",
" Salary | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Male | \n",
" 97308 | \n",
"
\n",
" \n",
" 1 | \n",
" Male | \n",
" 61933 | \n",
"
\n",
" \n",
" 2 | \n",
" Female | \n",
" 130590 | \n",
"
\n",
" \n",
" 3 | \n",
" Male | \n",
" 138705 | \n",
"
\n",
" \n",
" 4 | \n",
" Male | \n",
" 101004 | \n",
"
\n",
" \n",
" 5 | \n",
" Male | \n",
" 115163 | \n",
"
\n",
" \n",
" 6 | \n",
" Female | \n",
" 65476 | \n",
"
\n",
" \n",
" 7 | \n",
" Female | \n",
" 45906 | \n",
"
\n",
" \n",
" 8 | \n",
" Female | \n",
" 95570 | \n",
"
\n",
" \n",
" 9 | \n",
" Female | \n",
" 139852 | \n",
"
\n",
" \n",
" 10 | \n",
" Female | \n",
" 63241 | \n",
"
\n",
" \n",
" 11 | \n",
" Female | \n",
" 102508 | \n",
"
\n",
" \n",
" 12 | \n",
" Male | \n",
" 112807 | \n",
"
\n",
" \n",
" 13 | \n",
" Male | \n",
" 109831 | \n",
"
\n",
" \n",
" 14 | \n",
" Female | \n",
" 41426 | \n",
"
\n",
" \n",
" 15 | \n",
" Female | \n",
" 59414 | \n",
"
\n",
" \n",
" 16 | \n",
" Male | \n",
" 90370 | \n",
"
\n",
" \n",
" 17 | \n",
" Male | \n",
" 111737 | \n",
"
\n",
" \n",
" 18 | \n",
" Female | \n",
" 132940 | \n",
"
\n",
" \n",
" 19 | \n",
" Female | \n",
" 81014 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Gender Salary\n",
"0 Male 97308\n",
"1 Male 61933\n",
"2 Female 130590\n",
"3 Male 138705\n",
"4 Male 101004\n",
"5 Male 115163\n",
"6 Female 65476\n",
"7 Female 45906\n",
"8 Female 95570\n",
"9 Female 139852\n",
"10 Female 63241\n",
"11 Female 102508\n",
"12 Male 112807\n",
"13 Male 109831\n",
"14 Female 41426\n",
"15 Female 59414\n",
"16 Male 90370\n",
"17 Male 111737\n",
"18 Female 132940\n",
"19 Female 81014"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ed[['Gender','Salary']]"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" First Name | \n",
" Gender | \n",
" Start Date | \n",
" Last Login Time | \n",
" Salary | \n",
" Bonus % | \n",
" Senior Management | \n",
" Team | \n",
"
\n",
" \n",
" \n",
" \n",
" 8 | \n",
" Angela | \n",
" Female | \n",
" 11/22/2005 | \n",
" 6:29 AM | \n",
" 95570 | \n",
" 18.523 | \n",
" True | \n",
" Engineering | \n",
"
\n",
" \n",
" 9 | \n",
" Frances | \n",
" Female | \n",
" 8/8/2002 | \n",
" 6:51 AM | \n",
" 139852 | \n",
" 7.524 | \n",
" True | \n",
" Business Development | \n",
"
\n",
" \n",
" 10 | \n",
" Louise | \n",
" Female | \n",
" 8/12/1980 | \n",
" 9:01 AM | \n",
" 63241 | \n",
" 15.132 | \n",
" True | \n",
" NaN | \n",
"
\n",
" \n",
" 11 | \n",
" Julie | \n",
" Female | \n",
" 10/26/1997 | \n",
" 3:19 PM | \n",
" 102508 | \n",
" 12.637 | \n",
" True | \n",
" Legal | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" First Name Gender Start Date Last Login Time Salary Bonus % \\\n",
"8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n",
"9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n",
"10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n",
"11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n",
"\n",
" Senior Management Team \n",
"8 True Engineering \n",
"9 True Business Development \n",
"10 True NaN \n",
"11 True Legal "
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#selection by position:rows data:\n",
"ed.iloc[8:12]"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"First Name Angela\n",
"Gender Female\n",
"Start Date 11/22/2005\n",
"Last Login Time 6:29 AM\n",
"Salary 95570\n",
"Bonus % 18.523\n",
"Senior Management True\n",
"Team Engineering\n",
"Name: 8, dtype: object"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ed.loc[8]"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"First Name 1\n",
"Gender 0\n",
"Start Date 0\n",
"Last Login Time 0\n",
"Salary 0\n",
"Bonus % 0\n",
"Senior Management 1\n",
"Team 2\n",
"dtype: int64"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#data cleaning:\n",
"ed.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4"
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#total null values:\n",
"ed.isnull().sum().sum()"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"156"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ed.notnull().sum().sum()"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [],
"source": [
"#fpr practice on drop we will take the copy of the original data:\n",
"ed2=ed"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" First Name | \n",
" Gender | \n",
" Start Date | \n",
" Last Login Time | \n",
" Salary | \n",
" Bonus % | \n",
" Senior Management | \n",
" Team | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Douglas | \n",
" Male | \n",
" 8/6/1993 | \n",
" 12:42 PM | \n",
" 97308 | \n",
" 6.945 | \n",
" True | \n",
" Marketing | \n",
"
\n",
" \n",
" 1 | \n",
" Thomas | \n",
" Male | \n",
" 3/31/1996 | \n",
" 6:53 AM | \n",
" 61933 | \n",
" 4.170 | \n",
" True | \n",
" NaN | \n",
"
\n",
" \n",
" 2 | \n",
" Maria | \n",
" Female | \n",
" 4/23/1993 | \n",
" 11:17 AM | \n",
" 130590 | \n",
" 11.858 | \n",
" False | \n",
" Finance | \n",
"
\n",
" \n",
" 3 | \n",
" Jerry | \n",
" Male | \n",
" 3/4/2005 | \n",
" 1:00 PM | \n",
" 138705 | \n",
" 9.340 | \n",
" True | \n",
" Finance | \n",
"
\n",
" \n",
" 4 | \n",
" Larry | \n",
" Male | \n",
" 1/24/1998 | \n",
" 4:47 PM | \n",
" 101004 | \n",
" 1.389 | \n",
" True | \n",
" Client Services | \n",
"
\n",
" \n",
" 5 | \n",
" Dennis | \n",
" Male | \n",
" 4/18/1987 | \n",
" 1:35 AM | \n",
" 115163 | \n",
" 10.125 | \n",
" False | \n",
" Legal | \n",
"
\n",
" \n",
" 6 | \n",
" Ruby | \n",
" Female | \n",
" 8/17/1987 | \n",
" 4:20 PM | \n",
" 65476 | \n",
" 10.012 | \n",
" True | \n",
" Product | \n",
"
\n",
" \n",
" 7 | \n",
" NaN | \n",
" Female | \n",
" 7/20/2015 | \n",
" 10:43 AM | \n",
" 45906 | \n",
" 11.598 | \n",
" NaN | \n",
" Finance | \n",
"
\n",
" \n",
" 8 | \n",
" Angela | \n",
" Female | \n",
" 11/22/2005 | \n",
" 6:29 AM | \n",
" 95570 | \n",
" 18.523 | \n",
" True | \n",
" Engineering | \n",
"
\n",
" \n",
" 9 | \n",
" Frances | \n",
" Female | \n",
" 8/8/2002 | \n",
" 6:51 AM | \n",
" 139852 | \n",
" 7.524 | \n",
" True | \n",
" Business Development | \n",
"
\n",
" \n",
" 10 | \n",
" Louise | \n",
" Female | \n",
" 8/12/1980 | \n",
" 9:01 AM | \n",
" 63241 | \n",
" 15.132 | \n",
" True | \n",
" NaN | \n",
"
\n",
" \n",
" 11 | \n",
" Julie | \n",
" Female | \n",
" 10/26/1997 | \n",
" 3:19 PM | \n",
" 102508 | \n",
" 12.637 | \n",
" True | \n",
" Legal | \n",
"
\n",
" \n",
" 12 | \n",
" Brandon | \n",
" Male | \n",
" 12/1/1980 | \n",
" 1:08 AM | \n",
" 112807 | \n",
" 17.492 | \n",
" True | \n",
" Human Resources | \n",
"
\n",
" \n",
" 13 | \n",
" Gary | \n",
" Male | \n",
" 1/27/2008 | \n",
" 11:40 PM | \n",
" 109831 | \n",
" 5.831 | \n",
" False | \n",
" Sales | \n",
"
\n",
" \n",
" 14 | \n",
" Kimberly | \n",
" Female | \n",
" 1/14/1999 | \n",
" 7:13 AM | \n",
" 41426 | \n",
" 14.543 | \n",
" True | \n",
" Finance | \n",
"
\n",
" \n",
" 15 | \n",
" Lillian | \n",
" Female | \n",
" 6/5/2016 | \n",
" 6:09 AM | \n",
" 59414 | \n",
" 1.256 | \n",
" False | \n",
" Product | \n",
"
\n",
" \n",
" 16 | \n",
" Jeremy | \n",
" Male | \n",
" 9/21/2010 | \n",
" 5:56 AM | \n",
" 90370 | \n",
" 7.369 | \n",
" False | \n",
" Human Resources | \n",
"
\n",
" \n",
" 17 | \n",
" Shawn | \n",
" Male | \n",
" 12/7/1986 | \n",
" 7:45 PM | \n",
" 111737 | \n",
" 6.414 | \n",
" False | \n",
" Product | \n",
"
\n",
" \n",
" 18 | \n",
" Diana | \n",
" Female | \n",
" 10/23/1981 | \n",
" 10:27 AM | \n",
" 132940 | \n",
" 19.082 | \n",
" False | \n",
" Client Services | \n",
"
\n",
" \n",
" 19 | \n",
" Donna | \n",
" Female | \n",
" 7/22/2010 | \n",
" 3:48 AM | \n",
" 81014 | \n",
" 1.894 | \n",
" False | \n",
" Product | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" First Name Gender Start Date Last Login Time Salary Bonus % \\\n",
"0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n",
"1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n",
"2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n",
"3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n",
"4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n",
"5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n",
"6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n",
"7 NaN Female 7/20/2015 10:43 AM 45906 11.598 \n",
"8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n",
"9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n",
"10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n",
"11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n",
"12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n",
"13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n",
"14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n",
"15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n",
"16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n",
"17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n",
"18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n",
"19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n",
"\n",
" Senior Management Team \n",
"0 True Marketing \n",
"1 True NaN \n",
"2 False Finance \n",
"3 True Finance \n",
"4 True Client Services \n",
"5 False Legal \n",
"6 True Product \n",
"7 NaN Finance \n",
"8 True Engineering \n",
"9 True Business Development \n",
"10 True NaN \n",
"11 True Legal \n",
"12 True Human Resources \n",
"13 False Sales \n",
"14 True Finance \n",
"15 False Product \n",
"16 False Human Resources \n",
"17 False Product \n",
"18 False Client Services \n",
"19 False Product "
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ed2"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"prasent null values: 0\n"
]
}
],
"source": [
"#removing the totyal columns if they are with the null values:\n",
"ed3=ed2.dropna(axis=1)\n",
"print(\"prasent null values:\",ed3.isnull().sum().sum())"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
":1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" ed2.fillna(10,inplace=True)\n"
]
}
],
"source": [
"ed2.fillna(10,inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"First Name 0\n",
"Gender 0\n",
"Start Date 0\n",
"Last Login Time 0\n",
"Salary 0\n",
"Bonus % 0\n",
"Senior Management 0\n",
"Team 0\n",
"dtype: int64"
]
},
"execution_count": 97,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ed2.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" First Name | \n",
" Gender | \n",
" Start Date | \n",
" Last Login Time | \n",
" Salary | \n",
" Bonus % | \n",
" Senior Management | \n",
" Team | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Douglas | \n",
" Male | \n",
" 8/6/1993 | \n",
" 12:42 PM | \n",
" 97308 | \n",
" 6.945 | \n",
" True | \n",
" Marketing | \n",
"
\n",
" \n",
" 1 | \n",
" Thomas | \n",
" Male | \n",
" 3/31/1996 | \n",
" 6:53 AM | \n",
" 61933 | \n",
" 4.170 | \n",
" True | \n",
" 10 | \n",
"
\n",
" \n",
" 2 | \n",
" Maria | \n",
" Female | \n",
" 4/23/1993 | \n",
" 11:17 AM | \n",
" 130590 | \n",
" 11.858 | \n",
" False | \n",
" Finance | \n",
"
\n",
" \n",
" 3 | \n",
" Jerry | \n",
" Male | \n",
" 3/4/2005 | \n",
" 1:00 PM | \n",
" 138705 | \n",
" 9.340 | \n",
" True | \n",
" Finance | \n",
"
\n",
" \n",
" 4 | \n",
" Larry | \n",
" Male | \n",
" 1/24/1998 | \n",
" 4:47 PM | \n",
" 101004 | \n",
" 1.389 | \n",
" True | \n",
" Client Services | \n",
"
\n",
" \n",
" 5 | \n",
" Dennis | \n",
" Male | \n",
" 4/18/1987 | \n",
" 1:35 AM | \n",
" 115163 | \n",
" 10.125 | \n",
" False | \n",
" Legal | \n",
"
\n",
" \n",
" 6 | \n",
" Ruby | \n",
" Female | \n",
" 8/17/1987 | \n",
" 4:20 PM | \n",
" 65476 | \n",
" 10.012 | \n",
" True | \n",
" Product | \n",
"
\n",
" \n",
" 7 | \n",
" 10 | \n",
" Female | \n",
" 7/20/2015 | \n",
" 10:43 AM | \n",
" 45906 | \n",
" 11.598 | \n",
" 10 | \n",
" Finance | \n",
"
\n",
" \n",
" 8 | \n",
" Angela | \n",
" Female | \n",
" 11/22/2005 | \n",
" 6:29 AM | \n",
" 95570 | \n",
" 18.523 | \n",
" True | \n",
" Engineering | \n",
"
\n",
" \n",
" 9 | \n",
" Frances | \n",
" Female | \n",
" 8/8/2002 | \n",
" 6:51 AM | \n",
" 139852 | \n",
" 7.524 | \n",
" True | \n",
" Business Development | \n",
"
\n",
" \n",
" 10 | \n",
" Louise | \n",
" Female | \n",
" 8/12/1980 | \n",
" 9:01 AM | \n",
" 63241 | \n",
" 15.132 | \n",
" True | \n",
" 10 | \n",
"
\n",
" \n",
" 11 | \n",
" Julie | \n",
" Female | \n",
" 10/26/1997 | \n",
" 3:19 PM | \n",
" 102508 | \n",
" 12.637 | \n",
" True | \n",
" Legal | \n",
"
\n",
" \n",
" 12 | \n",
" Brandon | \n",
" Male | \n",
" 12/1/1980 | \n",
" 1:08 AM | \n",
" 112807 | \n",
" 17.492 | \n",
" True | \n",
" Human Resources | \n",
"
\n",
" \n",
" 13 | \n",
" Gary | \n",
" Male | \n",
" 1/27/2008 | \n",
" 11:40 PM | \n",
" 109831 | \n",
" 5.831 | \n",
" False | \n",
" Sales | \n",
"
\n",
" \n",
" 14 | \n",
" Kimberly | \n",
" Female | \n",
" 1/14/1999 | \n",
" 7:13 AM | \n",
" 41426 | \n",
" 14.543 | \n",
" True | \n",
" Finance | \n",
"
\n",
" \n",
" 15 | \n",
" Lillian | \n",
" Female | \n",
" 6/5/2016 | \n",
" 6:09 AM | \n",
" 59414 | \n",
" 1.256 | \n",
" False | \n",
" Product | \n",
"
\n",
" \n",
" 16 | \n",
" Jeremy | \n",
" Male | \n",
" 9/21/2010 | \n",
" 5:56 AM | \n",
" 90370 | \n",
" 7.369 | \n",
" False | \n",
" Human Resources | \n",
"
\n",
" \n",
" 17 | \n",
" Shawn | \n",
" Male | \n",
" 12/7/1986 | \n",
" 7:45 PM | \n",
" 111737 | \n",
" 6.414 | \n",
" False | \n",
" Product | \n",
"
\n",
" \n",
" 18 | \n",
" Diana | \n",
" Female | \n",
" 10/23/1981 | \n",
" 10:27 AM | \n",
" 132940 | \n",
" 19.082 | \n",
" False | \n",
" Client Services | \n",
"
\n",
" \n",
" 19 | \n",
" Donna | \n",
" Female | \n",
" 7/22/2010 | \n",
" 3:48 AM | \n",
" 81014 | \n",
" 1.894 | \n",
" False | \n",
" Product | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" First Name Gender Start Date Last Login Time Salary Bonus % \\\n",
"0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n",
"1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n",
"2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n",
"3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n",
"4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n",
"5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n",
"6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n",
"7 10 Female 7/20/2015 10:43 AM 45906 11.598 \n",
"8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n",
"9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n",
"10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n",
"11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n",
"12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n",
"13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n",
"14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n",
"15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n",
"16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n",
"17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n",
"18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n",
"19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n",
"\n",
" Senior Management Team \n",
"0 True Marketing \n",
"1 True 10 \n",
"2 False Finance \n",
"3 True Finance \n",
"4 True Client Services \n",
"5 False Legal \n",
"6 True Product \n",
"7 10 Finance \n",
"8 True Engineering \n",
"9 True Business Development \n",
"10 True 10 \n",
"11 True Legal \n",
"12 True Human Resources \n",
"13 False Sales \n",
"14 True Finance \n",
"15 False Product \n",
"16 False Human Resources \n",
"17 False Product \n",
"18 False Client Services \n",
"19 False Product "
]
},
"execution_count": 98,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ed2"
]
},
{
"cell_type": "code",
"execution_count": 118,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" First Name | \n",
" Gender | \n",
" Start Date | \n",
" Last Login Time | \n",
" Salary | \n",
" Bonus % | \n",
" Senior Management | \n",
" Team | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Douglas | \n",
" Male | \n",
" 8/6/1993 | \n",
" 12:42 PM | \n",
" 97308 | \n",
" 6.945 | \n",
" True | \n",
" Marketing | \n",
"
\n",
" \n",
" 1 | \n",
" Thomas | \n",
" Male | \n",
" 3/31/1996 | \n",
" 6:53 AM | \n",
" 61933 | \n",
" 4.170 | \n",
" True | \n",
" NaN | \n",
"
\n",
" \n",
" 2 | \n",
" Maria | \n",
" Female | \n",
" 4/23/1993 | \n",
" 11:17 AM | \n",
" 130590 | \n",
" 11.858 | \n",
" False | \n",
" Finance | \n",
"
\n",
" \n",
" 3 | \n",
" Jerry | \n",
" Male | \n",
" 3/4/2005 | \n",
" 1:00 PM | \n",
" 138705 | \n",
" 9.340 | \n",
" True | \n",
" Finance | \n",
"
\n",
" \n",
" 4 | \n",
" Larry | \n",
" Male | \n",
" 1/24/1998 | \n",
" 4:47 PM | \n",
" 101004 | \n",
" 1.389 | \n",
" True | \n",
" Client Services | \n",
"
\n",
" \n",
" 5 | \n",
" Dennis | \n",
" Male | \n",
" 4/18/1987 | \n",
" 1:35 AM | \n",
" 115163 | \n",
" 10.125 | \n",
" False | \n",
" Legal | \n",
"
\n",
" \n",
" 6 | \n",
" Ruby | \n",
" Female | \n",
" 8/17/1987 | \n",
" 4:20 PM | \n",
" 65476 | \n",
" 10.012 | \n",
" True | \n",
" Product | \n",
"
\n",
" \n",
" 7 | \n",
" NaN | \n",
" Female | \n",
" 7/20/2015 | \n",
" 10:43 AM | \n",
" 45906 | \n",
" 11.598 | \n",
" NaN | \n",
" Finance | \n",
"
\n",
" \n",
" 8 | \n",
" Angela | \n",
" Female | \n",
" 11/22/2005 | \n",
" 6:29 AM | \n",
" 95570 | \n",
" 18.523 | \n",
" True | \n",
" Engineering | \n",
"
\n",
" \n",
" 9 | \n",
" Frances | \n",
" Female | \n",
" 8/8/2002 | \n",
" 6:51 AM | \n",
" 139852 | \n",
" 7.524 | \n",
" True | \n",
" Business Development | \n",
"
\n",
" \n",
" 10 | \n",
" Louise | \n",
" Female | \n",
" 8/12/1980 | \n",
" 9:01 AM | \n",
" 63241 | \n",
" 15.132 | \n",
" True | \n",
" NaN | \n",
"
\n",
" \n",
" 11 | \n",
" Julie | \n",
" Female | \n",
" 10/26/1997 | \n",
" 3:19 PM | \n",
" 102508 | \n",
" 12.637 | \n",
" True | \n",
" Legal | \n",
"
\n",
" \n",
" 12 | \n",
" Brandon | \n",
" Male | \n",
" 12/1/1980 | \n",
" 1:08 AM | \n",
" 112807 | \n",
" 17.492 | \n",
" True | \n",
" Human Resources | \n",
"
\n",
" \n",
" 13 | \n",
" Gary | \n",
" Male | \n",
" 1/27/2008 | \n",
" 11:40 PM | \n",
" 109831 | \n",
" 5.831 | \n",
" False | \n",
" Sales | \n",
"
\n",
" \n",
" 14 | \n",
" Kimberly | \n",
" Female | \n",
" 1/14/1999 | \n",
" 7:13 AM | \n",
" 41426 | \n",
" 14.543 | \n",
" True | \n",
" Finance | \n",
"
\n",
" \n",
" 15 | \n",
" Lillian | \n",
" Female | \n",
" 6/5/2016 | \n",
" 6:09 AM | \n",
" 59414 | \n",
" 1.256 | \n",
" False | \n",
" Product | \n",
"
\n",
" \n",
" 16 | \n",
" Jeremy | \n",
" Male | \n",
" 9/21/2010 | \n",
" 5:56 AM | \n",
" 90370 | \n",
" 7.369 | \n",
" False | \n",
" Human Resources | \n",
"
\n",
" \n",
" 17 | \n",
" Shawn | \n",
" Male | \n",
" 12/7/1986 | \n",
" 7:45 PM | \n",
" 111737 | \n",
" 6.414 | \n",
" False | \n",
" Product | \n",
"
\n",
" \n",
" 18 | \n",
" Diana | \n",
" Female | \n",
" 10/23/1981 | \n",
" 10:27 AM | \n",
" 132940 | \n",
" 19.082 | \n",
" False | \n",
" Client Services | \n",
"
\n",
" \n",
" 19 | \n",
" Donna | \n",
" Female | \n",
" 7/22/2010 | \n",
" 3:48 AM | \n",
" 81014 | \n",
" 1.894 | \n",
" False | \n",
" Product | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" First Name Gender Start Date Last Login Time Salary Bonus % \\\n",
"0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n",
"1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n",
"2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n",
"3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n",
"4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n",
"5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n",
"6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n",
"7 NaN Female 7/20/2015 10:43 AM 45906 11.598 \n",
"8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n",
"9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n",
"10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n",
"11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n",
"12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n",
"13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n",
"14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n",
"15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n",
"16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n",
"17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n",
"18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n",
"19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n",
"\n",
" Senior Management Team \n",
"0 True Marketing \n",
"1 True NaN \n",
"2 False Finance \n",
"3 True Finance \n",
"4 True Client Services \n",
"5 False Legal \n",
"6 True Product \n",
"7 NaN Finance \n",
"8 True Engineering \n",
"9 True Business Development \n",
"10 True NaN \n",
"11 True Legal \n",
"12 True Human Resources \n",
"13 False Sales \n",
"14 True Finance \n",
"15 False Product \n",
"16 False Human Resources \n",
"17 False Product \n",
"18 False Client Services \n",
"19 False Product "
]
},
"execution_count": 118,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ed5=Employ.head(20)\n",
"ed5"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"First Name 0\n",
"Gender 0\n",
"Start Date 0\n",
"Last Login Time 0\n",
"Salary 0\n",
"Bonus % 0\n",
"Senior Management 0\n",
"Team 0\n",
"dtype: int64"
]
},
"execution_count": 111,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ed2.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" students | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 10/9/2020 | \n",
" 10 | \n",
"
\n",
" \n",
" 1 | \n",
" 11/09/2020 | \n",
" 20 | \n",
"
\n",
" \n",
" 2 | \n",
" 12/09/2020 | \n",
" 30 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date students\n",
"0 10/9/2020 10\n",
"1 11/09/2020 20\n",
"2 12/09/2020 30"
]
},
"execution_count": 120,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"date"
]
},
{
"cell_type": "code",
"execution_count": 124,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"date object\n",
"students int64\n",
"dtype: object"
]
},
"execution_count": 124,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"date.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 131,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" check | \n",
" students | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 10/9/2020 | \n",
" 10 | \n",
"
\n",
" \n",
" 1 | \n",
" 11/09/2020 | \n",
" 20 | \n",
"
\n",
" \n",
" 2 | \n",
" 12/09/2020 | \n",
" 30 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" check students\n",
"0 10/9/2020 10\n",
"1 11/09/2020 20\n",
"2 12/09/2020 30"
]
},
"execution_count": 131,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#rename for the date with the check:\n",
"date.rename(columns={'date':'check'})"
]
},
{
"cell_type": "code",
"execution_count": 134,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" students | \n",
"
\n",
" \n",
" \n",
" \n",
" 2 | \n",
" 12/09/2020 | \n",
" 30 | \n",
"
\n",
" \n",
" 1 | \n",
" 11/09/2020 | \n",
" 20 | \n",
"
\n",
" \n",
" 0 | \n",
" 10/9/2020 | \n",
" 10 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date students\n",
"2 12/09/2020 30\n",
"1 11/09/2020 20\n",
"0 10/9/2020 10"
]
},
"execution_count": 134,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"date.sort_values('students',ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 0\n",
"0 1\n",
"1 2\n",
"2 3\n",
"3 4\n",
"4 5\n",
"5 6\n"
]
}
],
"source": [
"#creating the data frame:\n",
"import pandas as pd\n",
"data=[1,2,3,4,5,6]\n",
"d_frame=pd.DataFrame(data)\n",
"print(d_frame)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 0 1 2\n",
"x std1 std2 std3\n",
"y azar ameer varun\n"
]
}
],
"source": [
"import numpy as np\n",
"arr=np.array([[\"std1\",\"std2\",\"std3\"],[\"azar\",\"ameer\",\"varun\"]])\n",
"d_frame2=pd.DataFrame(arr,index=['x','y'])\n",
"print(d_frame2)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
"
\n",
" \n",
" \n",
" \n",
" x | \n",
" False | \n",
"
\n",
" \n",
" y | \n",
" False | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0\n",
"x False\n",
"y False"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d_frame2.isnull()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 6.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 3.500000 | \n",
"
\n",
" \n",
" std | \n",
" 1.870829 | \n",
"
\n",
" \n",
" min | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 2.250000 | \n",
"
\n",
" \n",
" 50% | \n",
" 3.500000 | \n",
"
\n",
" \n",
" 75% | \n",
" 4.750000 | \n",
"
\n",
" \n",
" max | \n",
" 6.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0\n",
"count 6.000000\n",
"mean 3.500000\n",
"std 1.870829\n",
"min 1.000000\n",
"25% 2.250000\n",
"50% 3.500000\n",
"75% 4.750000\n",
"max 6.000000"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d_frame.describe()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 3.5\n",
"dtype: float64"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d_frame.mean()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 6\n",
"dtype: int64"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d_frame.count()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 6\n",
"dtype: int64"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d_frame.max()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 3.5\n",
"dtype: float64"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d_frame.median()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 1.870829\n",
"dtype: float64"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d_frame.std()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 3.5\n",
"dtype: float64"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d_frame.apply(np.mean)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/y8/34wjlypd37q4zn8rwhg1vsqc0000gn/T/ipykernel_7089/515644098.py:1: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
" data=np.array([[1,2,3,4,5,6],[1,2,3,4,5,6,7]])\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" [1, 2, 3, 4, 5, 6] | \n",
"
\n",
" \n",
" 1 | \n",
" [1, 2, 3, 4, 5, 6, 7] | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0\n",
"0 [1, 2, 3, 4, 5, 6]\n",
"1 [1, 2, 3, 4, 5, 6, 7]"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data=np.array([[1,2,3,4,5,6],[1,2,3,4,5,6,7]])\n",
"data_frame=pd.DataFrame(data)\n",
"data_frame"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
}
},
"nbformat": 4,
"nbformat_minor": 4
}