{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0\n", "0 1\n", "1 2\n", "2 3\n", "3 4\n" ] } ], "source": [ "df=[1,2,3,4]\n", "print(pd.DataFrame(df))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 1\n", "1 2\n", "2 3\n", "3 4\n", "dtype: int64\n" ] } ], "source": [ "print(pd.Series(df))" ] }, { "cell_type": "code", "execution_count": 115, "metadata": {}, "outputs": [], "source": [ "Employ=pd.read_csv(\"employees.csv\")" ] }, { "cell_type": "code", "execution_count": 116, "metadata": {}, "outputs": [], "source": [ "Employ_dub=Employ.head(20)" ] }, { "cell_type": "code", "execution_count": 117, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
First NameGenderStart DateLast Login TimeSalaryBonus %Senior ManagementTeam
0DouglasMale8/6/199312:42 PM973086.945TrueMarketing
1ThomasMale3/31/19966:53 AM619334.170TrueNaN
2MariaFemale4/23/199311:17 AM13059011.858FalseFinance
3JerryMale3/4/20051:00 PM1387059.340TrueFinance
4LarryMale1/24/19984:47 PM1010041.389TrueClient Services
5DennisMale4/18/19871:35 AM11516310.125FalseLegal
6RubyFemale8/17/19874:20 PM6547610.012TrueProduct
7NaNFemale7/20/201510:43 AM4590611.598NaNFinance
8AngelaFemale11/22/20056:29 AM9557018.523TrueEngineering
9FrancesFemale8/8/20026:51 AM1398527.524TrueBusiness Development
10LouiseFemale8/12/19809:01 AM6324115.132TrueNaN
11JulieFemale10/26/19973:19 PM10250812.637TrueLegal
12BrandonMale12/1/19801:08 AM11280717.492TrueHuman Resources
13GaryMale1/27/200811:40 PM1098315.831FalseSales
14KimberlyFemale1/14/19997:13 AM4142614.543TrueFinance
15LillianFemale6/5/20166:09 AM594141.256FalseProduct
16JeremyMale9/21/20105:56 AM903707.369FalseHuman Resources
17ShawnMale12/7/19867:45 PM1117376.414FalseProduct
18DianaFemale10/23/198110:27 AM13294019.082FalseClient Services
19DonnaFemale7/22/20103:48 AM810141.894FalseProduct
\n", "
" ], "text/plain": [ " First Name Gender Start Date Last Login Time Salary Bonus % \\\n", "0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n", "1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n", "2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n", "3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n", "4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n", "5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n", "6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n", "7 NaN Female 7/20/2015 10:43 AM 45906 11.598 \n", "8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n", "9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n", "10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n", "11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n", "12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n", "13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n", "14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n", "15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n", "16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n", "17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n", "18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n", "19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n", "\n", " Senior Management Team \n", "0 True Marketing \n", "1 True NaN \n", "2 False Finance \n", "3 True Finance \n", "4 True Client Services \n", "5 False Legal \n", "6 True Product \n", "7 NaN Finance \n", "8 True Engineering \n", "9 True Business Development \n", "10 True NaN \n", "11 True Legal \n", "12 True Human Resources \n", "13 False Sales \n", "14 True Finance \n", "15 False Product \n", "16 False Human Resources \n", "17 False Product \n", "18 False Client Services \n", "19 False Product " ] }, "execution_count": 117, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Employ_dub" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 20 entries, 0 to 19\n", "Data columns (total 8 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 First Name 19 non-null object \n", " 1 Gender 20 non-null object \n", " 2 Start Date 20 non-null object \n", " 3 Last Login Time 20 non-null object \n", " 4 Salary 20 non-null int64 \n", " 5 Bonus % 20 non-null float64\n", " 6 Senior Management 19 non-null object \n", " 7 Team 18 non-null object \n", "dtypes: float64(1), int64(1), object(6)\n", "memory usage: 868.0+ bytes\n" ] } ], "source": [ "#total info about the employee\n", "Employ_dub.info()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
First NameGenderStart DateLast Login TimeSalaryBonus %Senior ManagementTeam
0FalseFalseFalseFalseFalseFalseFalseFalse
1FalseFalseFalseFalseFalseFalseFalseTrue
2FalseFalseFalseFalseFalseFalseFalseFalse
3FalseFalseFalseFalseFalseFalseFalseFalse
4FalseFalseFalseFalseFalseFalseFalseFalse
5FalseFalseFalseFalseFalseFalseFalseFalse
6FalseFalseFalseFalseFalseFalseFalseFalse
7TrueFalseFalseFalseFalseFalseTrueFalse
8FalseFalseFalseFalseFalseFalseFalseFalse
9FalseFalseFalseFalseFalseFalseFalseFalse
10FalseFalseFalseFalseFalseFalseFalseTrue
11FalseFalseFalseFalseFalseFalseFalseFalse
12FalseFalseFalseFalseFalseFalseFalseFalse
13FalseFalseFalseFalseFalseFalseFalseFalse
14FalseFalseFalseFalseFalseFalseFalseFalse
15FalseFalseFalseFalseFalseFalseFalseFalse
16FalseFalseFalseFalseFalseFalseFalseFalse
17FalseFalseFalseFalseFalseFalseFalseFalse
18FalseFalseFalseFalseFalseFalseFalseFalse
19FalseFalseFalseFalseFalseFalseFalseFalse
\n", "
" ], "text/plain": [ " First Name Gender Start Date Last Login Time Salary Bonus % \\\n", "0 False False False False False False \n", "1 False False False False False False \n", "2 False False False False False False \n", "3 False False False False False False \n", "4 False False False False False False \n", "5 False False False False False False \n", "6 False False False False False False \n", "7 True False False False False False \n", "8 False False False False False False \n", "9 False False False False False False \n", "10 False False False False False False \n", "11 False False False False False False \n", "12 False False False False False False \n", "13 False False False False False False \n", "14 False False False False False False \n", "15 False False False False False False \n", "16 False False False False False False \n", "17 False False False False False False \n", "18 False False False False False False \n", "19 False False False False False False \n", "\n", " Senior Management Team \n", "0 False False \n", "1 False True \n", "2 False False \n", "3 False False \n", "4 False False \n", "5 False False \n", "6 False False \n", "7 True False \n", "8 False False \n", "9 False False \n", "10 False True \n", "11 False False \n", "12 False False \n", "13 False False \n", "14 False False \n", "15 False False \n", "16 False False \n", "17 False False \n", "18 False False \n", "19 False False " ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Employ_dub.isnull()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "First Name 1\n", "Gender 0\n", "Start Date 0\n", "Last Login Time 0\n", "Salary 0\n", "Bonus % 0\n", "Senior Management 1\n", "Team 2\n", "dtype: int64" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#checking for the null values in the dataset of employee\n", "Employ_dub.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "#changing the name of the dataset\n", "ed=Employ_dub" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(20, 8)" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#dimension of the dataset\n", "ed.shape" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['First Name', 'Gender', 'Start Date', 'Last Login Time', 'Salary',\n", " 'Bonus %', 'Senior Management', 'Team'],\n", " dtype='object')" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ed.columns" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "#working on the dictionary for a while:" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "#creating test objects:\n", "import numpy as np\n", "ff=pd.DataFrame(np.random.rand(20,5))" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01234
00.0207800.3651900.6738250.8001120.188644
10.6608450.2659130.4450280.8894380.601047
20.6469870.9268230.7228380.4752710.827945
30.8717240.2903530.0995780.1099490.229182
40.7047940.8840620.7513270.5957460.612269
50.3712690.5605120.5102640.2479230.618853
60.1503980.1169990.9348650.3157230.221538
70.5563360.8755140.4715260.5395110.271221
80.4282210.5467660.9212740.5005200.400341
90.1501700.8023780.6081240.3428710.076631
100.0990490.2807480.8659390.2145410.083318
110.0428670.7016390.0514570.6913850.051529
120.5308450.2483950.4337330.0494580.314959
130.1422300.7466340.5362470.0964990.123294
140.1396300.0564640.5956440.7640710.193826
150.7096240.5902620.8162680.1879310.366224
160.9829390.2603580.9188970.5312780.304655
170.3818230.0035940.0525970.9215290.022103
180.2279440.7068320.1372660.1291580.882734
190.2262570.8182130.3260710.2304190.668891
\n", "
" ], "text/plain": [ " 0 1 2 3 4\n", "0 0.020780 0.365190 0.673825 0.800112 0.188644\n", "1 0.660845 0.265913 0.445028 0.889438 0.601047\n", "2 0.646987 0.926823 0.722838 0.475271 0.827945\n", "3 0.871724 0.290353 0.099578 0.109949 0.229182\n", "4 0.704794 0.884062 0.751327 0.595746 0.612269\n", "5 0.371269 0.560512 0.510264 0.247923 0.618853\n", "6 0.150398 0.116999 0.934865 0.315723 0.221538\n", "7 0.556336 0.875514 0.471526 0.539511 0.271221\n", "8 0.428221 0.546766 0.921274 0.500520 0.400341\n", "9 0.150170 0.802378 0.608124 0.342871 0.076631\n", "10 0.099049 0.280748 0.865939 0.214541 0.083318\n", "11 0.042867 0.701639 0.051457 0.691385 0.051529\n", "12 0.530845 0.248395 0.433733 0.049458 0.314959\n", "13 0.142230 0.746634 0.536247 0.096499 0.123294\n", "14 0.139630 0.056464 0.595644 0.764071 0.193826\n", "15 0.709624 0.590262 0.816268 0.187931 0.366224\n", "16 0.982939 0.260358 0.918897 0.531278 0.304655\n", "17 0.381823 0.003594 0.052597 0.921529 0.022103\n", "18 0.227944 0.706832 0.137266 0.129158 0.882734\n", "19 0.226257 0.818213 0.326071 0.230419 0.668891" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ff" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 20 entries, 0 to 19\n", "Data columns (total 5 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 0 20 non-null float64\n", " 1 1 20 non-null float64\n", " 2 2 20 non-null float64\n", " 3 3 20 non-null float64\n", " 4 4 20 non-null float64\n", "dtypes: float64(5)\n", "memory usage: 868.0 bytes\n" ] } ], "source": [ "ff.info()" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datestudents
010/9/202010
111/09/202020
212/09/202030
\n", "
" ], "text/plain": [ " date students\n", "0 10/9/2020 10\n", "1 11/09/2020 20\n", "2 12/09/2020 30" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#data functon:\n", "date=pd.DataFrame(\n", "{\n", "\"date\":['10/9/2020','11/09/2020','12/09/2020'],\n", "\"students\":[10,20,30]})\n" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "First Name Gender Start Date Last Login Time Salary Bonus % Senior Management Team \n", "Angela Female 11/22/2005 6:29 AM 95570 18.523 True Engineering 1\n", "Jerry Male 3/4/2005 1:00 PM 138705 9.340 True Finance 1\n", "Ruby Female 8/17/1987 4:20 PM 65476 10.012 True Product 1\n", "Maria Female 4/23/1993 11:17 AM 130590 11.858 False Finance 1\n", "Lillian Female 6/5/2016 6:09 AM 59414 1.256 False Product 1\n", "Larry Male 1/24/1998 4:47 PM 101004 1.389 True Client Services 1\n", "Kimberly Female 1/14/1999 7:13 AM 41426 14.543 True Finance 1\n", "Julie Female 10/26/1997 3:19 PM 102508 12.637 True Legal 1\n", "Jeremy Male 9/21/2010 5:56 AM 90370 7.369 False Human Resources 1\n", "Brandon Male 12/1/1980 1:08 AM 112807 17.492 True Human Resources 1\n", "Gary Male 1/27/2008 11:40 PM 109831 5.831 False Sales 1\n", "Frances Female 8/8/2002 6:51 AM 139852 7.524 True Business Development 1\n", "Douglas Male 8/6/1993 12:42 PM 97308 6.945 True Marketing 1\n", "Donna Female 7/22/2010 3:48 AM 81014 1.894 False Product 1\n", "Diana Female 10/23/1981 10:27 AM 132940 19.082 False Client Services 1\n", "Dennis Male 4/18/1987 1:35 AM 115163 10.125 False Legal 1\n", "Shawn Male 12/7/1986 7:45 PM 111737 6.414 False Product 1\n", "dtype: int64" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ed.value_counts()" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GenderSalary
0Male97308
1Male61933
2Female130590
3Male138705
4Male101004
5Male115163
6Female65476
7Female45906
8Female95570
9Female139852
10Female63241
11Female102508
12Male112807
13Male109831
14Female41426
15Female59414
16Male90370
17Male111737
18Female132940
19Female81014
\n", "
" ], "text/plain": [ " Gender Salary\n", "0 Male 97308\n", "1 Male 61933\n", "2 Female 130590\n", "3 Male 138705\n", "4 Male 101004\n", "5 Male 115163\n", "6 Female 65476\n", "7 Female 45906\n", "8 Female 95570\n", "9 Female 139852\n", "10 Female 63241\n", "11 Female 102508\n", "12 Male 112807\n", "13 Male 109831\n", "14 Female 41426\n", "15 Female 59414\n", "16 Male 90370\n", "17 Male 111737\n", "18 Female 132940\n", "19 Female 81014" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ed[['Gender','Salary']]" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
First NameGenderStart DateLast Login TimeSalaryBonus %Senior ManagementTeam
8AngelaFemale11/22/20056:29 AM9557018.523TrueEngineering
9FrancesFemale8/8/20026:51 AM1398527.524TrueBusiness Development
10LouiseFemale8/12/19809:01 AM6324115.132TrueNaN
11JulieFemale10/26/19973:19 PM10250812.637TrueLegal
\n", "
" ], "text/plain": [ " First Name Gender Start Date Last Login Time Salary Bonus % \\\n", "8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n", "9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n", "10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n", "11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n", "\n", " Senior Management Team \n", "8 True Engineering \n", "9 True Business Development \n", "10 True NaN \n", "11 True Legal " ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#selection by position:rows data:\n", "ed.iloc[8:12]" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "First Name Angela\n", "Gender Female\n", "Start Date 11/22/2005\n", "Last Login Time 6:29 AM\n", "Salary 95570\n", "Bonus % 18.523\n", "Senior Management True\n", "Team Engineering\n", "Name: 8, dtype: object" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ed.loc[8]" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "First Name 1\n", "Gender 0\n", "Start Date 0\n", "Last Login Time 0\n", "Salary 0\n", "Bonus % 0\n", "Senior Management 1\n", "Team 2\n", "dtype: int64" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#data cleaning:\n", "ed.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#total null values:\n", "ed.isnull().sum().sum()" ] }, { "cell_type": "code", "execution_count": 77, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "156" ] }, "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ed.notnull().sum().sum()" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [], "source": [ "#fpr practice on drop we will take the copy of the original data:\n", "ed2=ed" ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
First NameGenderStart DateLast Login TimeSalaryBonus %Senior ManagementTeam
0DouglasMale8/6/199312:42 PM973086.945TrueMarketing
1ThomasMale3/31/19966:53 AM619334.170TrueNaN
2MariaFemale4/23/199311:17 AM13059011.858FalseFinance
3JerryMale3/4/20051:00 PM1387059.340TrueFinance
4LarryMale1/24/19984:47 PM1010041.389TrueClient Services
5DennisMale4/18/19871:35 AM11516310.125FalseLegal
6RubyFemale8/17/19874:20 PM6547610.012TrueProduct
7NaNFemale7/20/201510:43 AM4590611.598NaNFinance
8AngelaFemale11/22/20056:29 AM9557018.523TrueEngineering
9FrancesFemale8/8/20026:51 AM1398527.524TrueBusiness Development
10LouiseFemale8/12/19809:01 AM6324115.132TrueNaN
11JulieFemale10/26/19973:19 PM10250812.637TrueLegal
12BrandonMale12/1/19801:08 AM11280717.492TrueHuman Resources
13GaryMale1/27/200811:40 PM1098315.831FalseSales
14KimberlyFemale1/14/19997:13 AM4142614.543TrueFinance
15LillianFemale6/5/20166:09 AM594141.256FalseProduct
16JeremyMale9/21/20105:56 AM903707.369FalseHuman Resources
17ShawnMale12/7/19867:45 PM1117376.414FalseProduct
18DianaFemale10/23/198110:27 AM13294019.082FalseClient Services
19DonnaFemale7/22/20103:48 AM810141.894FalseProduct
\n", "
" ], "text/plain": [ " First Name Gender Start Date Last Login Time Salary Bonus % \\\n", "0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n", "1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n", "2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n", "3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n", "4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n", "5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n", "6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n", "7 NaN Female 7/20/2015 10:43 AM 45906 11.598 \n", "8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n", "9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n", "10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n", "11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n", "12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n", "13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n", "14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n", "15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n", "16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n", "17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n", "18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n", "19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n", "\n", " Senior Management Team \n", "0 True Marketing \n", "1 True NaN \n", "2 False Finance \n", "3 True Finance \n", "4 True Client Services \n", "5 False Legal \n", "6 True Product \n", "7 NaN Finance \n", "8 True Engineering \n", "9 True Business Development \n", "10 True NaN \n", "11 True Legal \n", "12 True Human Resources \n", "13 False Sales \n", "14 True Finance \n", "15 False Product \n", "16 False Human Resources \n", "17 False Product \n", "18 False Client Services \n", "19 False Product " ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ed2" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "prasent null values: 0\n" ] } ], "source": [ "#removing the totyal columns if they are with the null values:\n", "ed3=ed2.dropna(axis=1)\n", "print(\"prasent null values:\",ed3.isnull().sum().sum())" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " ed2.fillna(10,inplace=True)\n" ] } ], "source": [ "ed2.fillna(10,inplace=True)" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "First Name 0\n", "Gender 0\n", "Start Date 0\n", "Last Login Time 0\n", "Salary 0\n", "Bonus % 0\n", "Senior Management 0\n", "Team 0\n", "dtype: int64" ] }, "execution_count": 97, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ed2.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
First NameGenderStart DateLast Login TimeSalaryBonus %Senior ManagementTeam
0DouglasMale8/6/199312:42 PM973086.945TrueMarketing
1ThomasMale3/31/19966:53 AM619334.170True10
2MariaFemale4/23/199311:17 AM13059011.858FalseFinance
3JerryMale3/4/20051:00 PM1387059.340TrueFinance
4LarryMale1/24/19984:47 PM1010041.389TrueClient Services
5DennisMale4/18/19871:35 AM11516310.125FalseLegal
6RubyFemale8/17/19874:20 PM6547610.012TrueProduct
710Female7/20/201510:43 AM4590611.59810Finance
8AngelaFemale11/22/20056:29 AM9557018.523TrueEngineering
9FrancesFemale8/8/20026:51 AM1398527.524TrueBusiness Development
10LouiseFemale8/12/19809:01 AM6324115.132True10
11JulieFemale10/26/19973:19 PM10250812.637TrueLegal
12BrandonMale12/1/19801:08 AM11280717.492TrueHuman Resources
13GaryMale1/27/200811:40 PM1098315.831FalseSales
14KimberlyFemale1/14/19997:13 AM4142614.543TrueFinance
15LillianFemale6/5/20166:09 AM594141.256FalseProduct
16JeremyMale9/21/20105:56 AM903707.369FalseHuman Resources
17ShawnMale12/7/19867:45 PM1117376.414FalseProduct
18DianaFemale10/23/198110:27 AM13294019.082FalseClient Services
19DonnaFemale7/22/20103:48 AM810141.894FalseProduct
\n", "
" ], "text/plain": [ " First Name Gender Start Date Last Login Time Salary Bonus % \\\n", "0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n", "1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n", "2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n", "3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n", "4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n", "5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n", "6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n", "7 10 Female 7/20/2015 10:43 AM 45906 11.598 \n", "8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n", "9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n", "10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n", "11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n", "12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n", "13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n", "14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n", "15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n", "16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n", "17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n", "18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n", "19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n", "\n", " Senior Management Team \n", "0 True Marketing \n", "1 True 10 \n", "2 False Finance \n", "3 True Finance \n", "4 True Client Services \n", "5 False Legal \n", "6 True Product \n", "7 10 Finance \n", "8 True Engineering \n", "9 True Business Development \n", "10 True 10 \n", "11 True Legal \n", "12 True Human Resources \n", "13 False Sales \n", "14 True Finance \n", "15 False Product \n", "16 False Human Resources \n", "17 False Product \n", "18 False Client Services \n", "19 False Product " ] }, "execution_count": 98, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ed2" ] }, { "cell_type": "code", "execution_count": 118, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
First NameGenderStart DateLast Login TimeSalaryBonus %Senior ManagementTeam
0DouglasMale8/6/199312:42 PM973086.945TrueMarketing
1ThomasMale3/31/19966:53 AM619334.170TrueNaN
2MariaFemale4/23/199311:17 AM13059011.858FalseFinance
3JerryMale3/4/20051:00 PM1387059.340TrueFinance
4LarryMale1/24/19984:47 PM1010041.389TrueClient Services
5DennisMale4/18/19871:35 AM11516310.125FalseLegal
6RubyFemale8/17/19874:20 PM6547610.012TrueProduct
7NaNFemale7/20/201510:43 AM4590611.598NaNFinance
8AngelaFemale11/22/20056:29 AM9557018.523TrueEngineering
9FrancesFemale8/8/20026:51 AM1398527.524TrueBusiness Development
10LouiseFemale8/12/19809:01 AM6324115.132TrueNaN
11JulieFemale10/26/19973:19 PM10250812.637TrueLegal
12BrandonMale12/1/19801:08 AM11280717.492TrueHuman Resources
13GaryMale1/27/200811:40 PM1098315.831FalseSales
14KimberlyFemale1/14/19997:13 AM4142614.543TrueFinance
15LillianFemale6/5/20166:09 AM594141.256FalseProduct
16JeremyMale9/21/20105:56 AM903707.369FalseHuman Resources
17ShawnMale12/7/19867:45 PM1117376.414FalseProduct
18DianaFemale10/23/198110:27 AM13294019.082FalseClient Services
19DonnaFemale7/22/20103:48 AM810141.894FalseProduct
\n", "
" ], "text/plain": [ " First Name Gender Start Date Last Login Time Salary Bonus % \\\n", "0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n", "1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n", "2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n", "3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n", "4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n", "5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n", "6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n", "7 NaN Female 7/20/2015 10:43 AM 45906 11.598 \n", "8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n", "9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n", "10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n", "11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n", "12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n", "13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n", "14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n", "15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n", "16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n", "17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n", "18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n", "19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n", "\n", " Senior Management Team \n", "0 True Marketing \n", "1 True NaN \n", "2 False Finance \n", "3 True Finance \n", "4 True Client Services \n", "5 False Legal \n", "6 True Product \n", "7 NaN Finance \n", "8 True Engineering \n", "9 True Business Development \n", "10 True NaN \n", "11 True Legal \n", "12 True Human Resources \n", "13 False Sales \n", "14 True Finance \n", "15 False Product \n", "16 False Human Resources \n", "17 False Product \n", "18 False Client Services \n", "19 False Product " ] }, "execution_count": 118, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ed5=Employ.head(20)\n", "ed5" ] }, { "cell_type": "code", "execution_count": 111, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "First Name 0\n", "Gender 0\n", "Start Date 0\n", "Last Login Time 0\n", "Salary 0\n", "Bonus % 0\n", "Senior Management 0\n", "Team 0\n", "dtype: int64" ] }, "execution_count": 111, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ed2.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 120, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datestudents
010/9/202010
111/09/202020
212/09/202030
\n", "
" ], "text/plain": [ " date students\n", "0 10/9/2020 10\n", "1 11/09/2020 20\n", "2 12/09/2020 30" ] }, "execution_count": 120, "metadata": {}, "output_type": "execute_result" } ], "source": [ "date" ] }, { "cell_type": "code", "execution_count": 124, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "date object\n", "students int64\n", "dtype: object" ] }, "execution_count": 124, "metadata": {}, "output_type": "execute_result" } ], "source": [ "date.dtypes" ] }, { "cell_type": "code", "execution_count": 131, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
checkstudents
010/9/202010
111/09/202020
212/09/202030
\n", "
" ], "text/plain": [ " check students\n", "0 10/9/2020 10\n", "1 11/09/2020 20\n", "2 12/09/2020 30" ] }, "execution_count": 131, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#rename for the date with the check:\n", "date.rename(columns={'date':'check'})" ] }, { "cell_type": "code", "execution_count": 134, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datestudents
212/09/202030
111/09/202020
010/9/202010
\n", "
" ], "text/plain": [ " date students\n", "2 12/09/2020 30\n", "1 11/09/2020 20\n", "0 10/9/2020 10" ] }, "execution_count": 134, "metadata": {}, "output_type": "execute_result" } ], "source": [ "date.sort_values('students',ascending=False)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0\n", "0 1\n", "1 2\n", "2 3\n", "3 4\n", "4 5\n", "5 6\n" ] } ], "source": [ "#creating the data frame:\n", "import pandas as pd\n", "data=[1,2,3,4,5,6]\n", "d_frame=pd.DataFrame(data)\n", "print(d_frame)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0 1 2\n", "x std1 std2 std3\n", "y azar ameer varun\n" ] } ], "source": [ "import numpy as np\n", "arr=np.array([[\"std1\",\"std2\",\"std3\"],[\"azar\",\"ameer\",\"varun\"]])\n", "d_frame2=pd.DataFrame(arr,index=['x','y'])\n", "print(d_frame2)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0
xFalse
yFalse
\n", "
" ], "text/plain": [ " 0\n", "x False\n", "y False" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d_frame2.isnull()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0
count6.000000
mean3.500000
std1.870829
min1.000000
25%2.250000
50%3.500000
75%4.750000
max6.000000
\n", "
" ], "text/plain": [ " 0\n", "count 6.000000\n", "mean 3.500000\n", "std 1.870829\n", "min 1.000000\n", "25% 2.250000\n", "50% 3.500000\n", "75% 4.750000\n", "max 6.000000" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d_frame.describe()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 3.5\n", "dtype: float64" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d_frame.mean()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 6\n", "dtype: int64" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d_frame.count()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 6\n", "dtype: int64" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d_frame.max()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 3.5\n", "dtype: float64" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d_frame.median()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 1.870829\n", "dtype: float64" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d_frame.std()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 3.5\n", "dtype: float64" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d_frame.apply(np.mean)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/y8/34wjlypd37q4zn8rwhg1vsqc0000gn/T/ipykernel_7089/515644098.py:1: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n", " data=np.array([[1,2,3,4,5,6],[1,2,3,4,5,6,7]])\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0
0[1, 2, 3, 4, 5, 6]
1[1, 2, 3, 4, 5, 6, 7]
\n", "
" ], "text/plain": [ " 0\n", "0 [1, 2, 3, 4, 5, 6]\n", "1 [1, 2, 3, 4, 5, 6, 7]" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data=np.array([[1,2,3,4,5,6],[1,2,3,4,5,6,7]])\n", "data_frame=pd.DataFrame(data)\n", "data_frame" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 4 }