nyomanyudisdeveloper commited on
Commit
8124116
·
verified ·
1 Parent(s): f7efa77

First Commit

Browse files
Files changed (6) hide show
  1. P1G5_Set_1_yudis_aditya.csv +0 -0
  2. app.py +10 -0
  3. eda.py +164 -0
  4. model_svm.pkl +3 -0
  5. prediction.py +132 -0
  6. requirement.txt +7 -0
P1G5_Set_1_yudis_aditya.csv ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda
3
+ import prediction
4
+
5
+ navigation = st.sidebar.selectbox('Pilih Halaman:', {'EDA':'eda','Prediction':'pred'})
6
+
7
+ if navigation == 'EDA':
8
+ eda.run()
9
+ else:
10
+ prediction.run()
eda.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ import plotly.express as px
6
+
7
+ # This cell is used to make new column called 'group_age'
8
+ def filter_group_age(age):
9
+ if 0 <= age <= 14:
10
+ return "children"
11
+ elif 15 <= age <= 24:
12
+ return "youth"
13
+ elif 25 <= age <= 64:
14
+ return "adults"
15
+ elif age >= 65:
16
+ return "seniors"
17
+ else:
18
+ return "unknown"
19
+
20
+
21
+
22
+ def run():
23
+ st.title('Credit Card Payment Exploration Data Analyst(EDA)')
24
+ st.image('https://www.i1.creditdonkey.com/image/1/[email protected]')
25
+ st.write('This page is made by Yudis Aditya')
26
+ st.markdown('---')
27
+
28
+ st.write('In this page, I want to show visualization data based on my analyst process about credit card payment. This EDA has information about customer segmentation and customer behaviour')
29
+ st.link_button('Link dataset','https://console.cloud.google.com/bigquery?p=bigquery-public-data&d=ml_datasets&t=credit_card_default&page=table&project=direct-keel-424102-f5&ws=!1m5!1m4!4m3!1sbigquery-public-data!2sml_datasets!3scredit_card_default')
30
+
31
+
32
+ df = pd.read_csv('P1G5_Set_1_yudis_aditya.csv')
33
+ df['group_age'] = df['age'].apply(filter_group_age)
34
+
35
+ st.write('### Distribution Data Age')
36
+ # This cell is used to create histogram column age
37
+ data = df['age']
38
+ fig = plt.figure(figsize=(15,5))
39
+ plt.hist(x=data)
40
+ plt.title("Distribution Data Age")
41
+ plt.show()
42
+ st.pyplot(plt)
43
+ st.write("From Graph 'Distribution Data Age' we can see that the most client who use credit card have age around 20 - 40")
44
+
45
+
46
+ st.write('Based on Standarization Statistic Canada, we can make group of age like this :')
47
+ st.write('- 0 - 14 = children ')
48
+ st.write('- 15 - 24 = youth')
49
+ st.write('- 25 - 64 = adults ')
50
+ st.write('- more than equal 65 = seniors')
51
+
52
+
53
+ data = df.groupby('group_age').size()
54
+ x = data.index
55
+ y = data.values
56
+ fig = plt.figure(figsize=(15,5))
57
+ plt.pie(y,labels=x,autopct='%1.1f%%')
58
+ plt.title("Pie Chart Data based on Group Age")
59
+ st.pyplot(plt)
60
+
61
+ st.write('From Graph "Pie Chart Data based on Group Age" we can know that majority significant people who use credit card is in group age adults. and The second is youth but not really significant. And group age "seniors" is rare to use credit card')
62
+
63
+
64
+ st.write('### Distribution Data Marital Status')
65
+ # This cell is used to create pie graph to know distibution data based on marital_status
66
+ data = df[df['marital_status'] != 0].groupby('marital_status').size()
67
+ x = ['married','single','others']
68
+ y = data.values
69
+
70
+ fig = plt.figure(figsize=(15,5))
71
+ plt.pie(y,labels=x,autopct='%1.1f%%')
72
+ plt.title("Distribution data person based on marital status")
73
+ st.pyplot(fig)
74
+
75
+ st.write('From Graph above we can see that person who single is more to use credit card than married. person who has marital_status others has not really significant')
76
+
77
+ st.write('### Distribution Data Gender')
78
+ data = df.groupby('sex').size()
79
+ x = ['male','female']
80
+ y = data.values
81
+
82
+
83
+ fig = plt.figure(figsize=(15,5))
84
+ plt.pie(y,labels=x,autopct='%1.1f%%')
85
+ plt.title("Distribution data person based on gender")
86
+ st.pyplot(fig)
87
+
88
+ st.write('From Graph above we can see that female is more using credit card than male')
89
+ st.write('### Total People who pay duly on April - September in 2005')
90
+ fig = plt.figure(figsize=(15,5))
91
+ # This cell is used to create line chart to know total status payment duly in 2005
92
+ data_6 = df[(df['pay_6'] <= 0)][['pay_6']].count()
93
+ data_5 = df[(df['pay_5'] <= 0)][['pay_5']].count()
94
+ data_4 = df[(df['pay_4'] <= 0)][['pay_4']].count()
95
+ data_3 = df[(df['pay_3'] <= 0)][['pay_3']].count()
96
+ data_2 = df[(df['pay_2'] <= 0)][['pay_2']].count()
97
+ data_1 = df[(df['pay_0'] <= 0)][['pay_0']].count()
98
+
99
+ data_input = {
100
+ 'Apr':data_1.values[0],
101
+ 'May':data_2.values[0],
102
+ 'Jun':data_3.values[0],
103
+ 'July':data_4.values[0],
104
+ 'August':data_5.values[0],
105
+ 'Sep':data_6.values[0],
106
+ }
107
+
108
+ data_visualize = pd.DataFrame([data_input])
109
+ data_visualize.values.tolist()[0]
110
+
111
+ # print(data)
112
+ x = data_visualize.columns.to_list()
113
+ y = data_visualize.values.tolist()[0]
114
+
115
+ plt.plot(x,y)
116
+ plt.title("Total status payment duly in 2005")
117
+ st.pyplot(fig)
118
+ st.write('From Graph above we can know that Total people who pay duly are increased significant from April 2005 until September 2005. ')
119
+
120
+ st.write('### Median Bill Amount on April - September in 2005')
121
+ fig = plt.figure(figsize=(15,5))
122
+ # This cell is used to create line chart to know total status payment duly in 2005
123
+ data_6 = df[['bill_amt_6']].median()
124
+ data_5 = df[['bill_amt_5']].median()
125
+ data_4 = df[['bill_amt_4']].median()
126
+ data_3 = df[['bill_amt_3']].median()
127
+ data_2 = df[['bill_amt_2']].median()
128
+ data_1 = df[['bill_amt_1']].median()
129
+
130
+ data_input = {
131
+ 'Apr':data_1.values[0],
132
+ 'May':data_2.values[0],
133
+ 'Jun':data_3.values[0],
134
+ 'July':data_4.values[0],
135
+ 'August':data_5.values[0],
136
+ 'Sep':data_6.values[0],
137
+ }
138
+
139
+ data_visualize = pd.DataFrame([data_input])
140
+ data_visualize.values.tolist()[0]
141
+
142
+ # print(data)
143
+ x = data_visualize.columns.to_list()
144
+ y = data_visualize.values.tolist()[0]
145
+
146
+ plt.bar(x,y)
147
+ plt.title("Median Bill amount from April - September 2005")
148
+ st.pyplot(fig)
149
+ st.write('From graph above i use median instead mean because distribution data is not normal. We can see that median bill amount from april until September is decrased. That visualize that most people has decrased bill amount every month, which is good.')
150
+
151
+ st.write('### Total people who pay and not pay for next month (October) 2005')
152
+ fig = plt.figure(figsize=(15,5))
153
+ data_input = df.groupby(['default_payment_next_month']).size()
154
+
155
+ x = ['Pay','Not Pay']
156
+ y = data_input.values
157
+
158
+ plt.bar(x,y)
159
+ plt.title('Total people who pay and dont pay on October 2005')
160
+ st.pyplot(fig)
161
+ st.write('From Graph above we know that more people pay for next month (October) than not pay. It is good result because that indicate client good behaviour')
162
+
163
+ if __name__ == '__main__':
164
+ run()
model_svm.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f896c187f0a72826dad650c9742111070cfaf700a5a4128d3812b4c0b777961
3
+ size 290673
prediction.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import pickle
5
+
6
+ with open("model_svm.pkl","rb") as file_1:
7
+ model_svm = pickle.load(file_1)
8
+
9
+ def get_education_level_index(education_level_name):
10
+ if education_level_name == 'Graduate School':
11
+ return 1
12
+ elif education_level_name == 'University':
13
+ return 2
14
+ elif education_level_name == 'High School':
15
+ return 3
16
+ else:
17
+ return 4
18
+
19
+ def get_marital_status_index(marital_status_name):
20
+ if marital_status_name == 'Married':
21
+ return 1
22
+ elif marital_status_name == 'Single':
23
+ return 2
24
+ else:
25
+ return 3
26
+
27
+ def get_payment_status_index(payment_status_name):
28
+ if payment_status_name == 'Pay Duly':
29
+ return -1
30
+ elif payment_status_name == 'Payment delay for one month':
31
+ return 1
32
+ elif payment_status_name == 'Payment delay for two month':
33
+ return 2
34
+ elif payment_status_name == 'Payment delay for three month':
35
+ return 3
36
+ elif payment_status_name== 'Payment delay for four month':
37
+ return 4
38
+ elif payment_status_name== 'Payment delay for five month':
39
+ return 5
40
+ elif payment_status_name== 'Payment delay for six month':
41
+ return 6
42
+ elif payment_status_name== 'Payment delay for seven month':
43
+ return 7
44
+ elif payment_status_name== 'Payment delay for eight month':
45
+ return 8
46
+ elif payment_status_name== 'Payment delay for nine month':
47
+ return 8
48
+
49
+ def run():
50
+ with st.form("prediction_form"):
51
+ st.write('Personal Information')
52
+ limit_balance = st.number_input('Input limit balance', value=10000.0)
53
+ sex = st.selectbox('Gender', {'Male','Female'},index=0)
54
+ education_level = st.selectbox('Education Level ', {'Graduate School','University','High School','Others'},index=0)
55
+ marital_status = st.selectbox('Marital Status ', {'Married','Single','Others'},index=0)
56
+ age = st.number_input('Age', value=20)
57
+
58
+ st.markdown('---')
59
+ st.write('Repayment Status')
60
+
61
+ pay_1 = st.selectbox('RePayment Status September 2005 ', {'Pay Duly','Payment delay for one month','Payment delay for two month','Payment delay for three month','Payment delay for four month','Payment delay for five month','Payment delay for six month','Payment delay for seven month','Payment delay for eight month','Payment delay for nine month'},index=0)
62
+ pay_2 = st.selectbox('RePayment Status August 2005 ', {'Pay Duly','Payment delay for one month','Payment delay for two month','Payment delay for three month','Payment delay for four month','Payment delay for five month','Payment delay for six month','Payment delay for seven month','Payment delay for eight month','Payment delay for nine month'},index=0)
63
+ pay_3 = st.selectbox('RePayment Status July 2005 ', {'Pay Duly','Payment delay for one month','Payment delay for two month','Payment delay for three month','Payment delay for four month','Payment delay for five month','Payment delay for six month','Payment delay for seven month','Payment delay for eight month','Payment delay for nine month'},index=0)
64
+ pay_4 = st.selectbox('RePayment Status June 2005 ', {'Pay Duly','Payment delay for one month','Payment delay for two month','Payment delay for three month','Payment delay for four month','Payment delay for five month','Payment delay for six month','Payment delay for seven month','Payment delay for eight month','Payment delay for nine month'},index=0)
65
+ pay_5 = st.selectbox('RePayment Status May 2005 ', {'Pay Duly','Payment delay for one month','Payment delay for two month','Payment delay for three month','Payment delay for four month','Payment delay for five month','Payment delay for six month','Payment delay for seven month','Payment delay for eight month','Payment delay for nine month'},index=0)
66
+ pay_6 = st.selectbox('RePayment Status April 2005 ', {'Pay Duly','Payment delay for one month','Payment delay for two month','Payment delay for three month','Payment delay for four month','Payment delay for five month','Payment delay for six month','Payment delay for seven month','Payment delay for eight month','Payment delay for nine month'},index=0)
67
+
68
+ st.markdown('---')
69
+ st.write('Bill Amount')
70
+ bill_amt_1 = st.number_input('Input Billing Amount September 2005 ', value=10000.0)
71
+ bill_amt_2 = st.number_input('Input Billing Amount August 2005 ', value=10000.0)
72
+ bill_amt_3 = st.number_input('Input Billing Amount July 2005 ', value=10000.0)
73
+ bill_amt_4 = st.number_input('Input Billing Amount June 2005 ', value=10000.0)
74
+ bill_amt_5 = st.number_input('Input Billing Amount May 2005 ', value=10000.0)
75
+ bill_amt_6 = st.number_input('Input Billing Amount April 2005 ', value=10000.0)
76
+
77
+ st.markdown('---')
78
+ st.write('Amount Previous payment')
79
+ pay_amt_1 = st.number_input('Amount of previous payment in September 2005 ', value=10000.0)
80
+ pay_amt_2 = st.number_input('Amount of previous payment in August 2005 ', value=10000.0)
81
+ pay_amt_3 = st.number_input('Amount of previous payment in July 2005 ', value=10000.0)
82
+ pay_amt_4 = st.number_input('Amount of previous payment in June 2005 ', value=10000.0)
83
+ pay_amt_5 = st.number_input('Amount of previous payment in May 2005 ', value=10000.0)
84
+ pay_amt_6 = st.number_input('Amount of previous payment in April 2005 ', value=10000.0)
85
+
86
+ submitted = st.form_submit_button("Submit")
87
+ st.write("Outside the form")
88
+
89
+ data_inf = {
90
+ 'limit_balance': limit_balance,
91
+ 'sex' : sex,
92
+ 'education_level' : get_education_level_index(education_level),
93
+ 'marital_status' : get_marital_status_index(marital_status),
94
+ 'age' : age,
95
+ 'pay_1': get_payment_status_index(pay_1),
96
+ 'pay_2': get_payment_status_index(pay_2),
97
+ 'pay_3': get_payment_status_index(pay_3),
98
+ 'pay_4': get_payment_status_index(pay_4),
99
+ 'pay_5': get_payment_status_index(pay_5),
100
+ 'pay_6': get_payment_status_index(pay_6),
101
+ 'bill_amt_1': bill_amt_1,
102
+ 'bill_amt_2': bill_amt_2,
103
+ 'bill_amt_3': bill_amt_3,
104
+ 'bill_amt_4': bill_amt_4,
105
+ 'bill_amt_5': bill_amt_5,
106
+ 'bill_amt_6': bill_amt_6,
107
+ 'pay_amt_1':pay_amt_1,
108
+ 'pay_amt_2':pay_amt_2,
109
+ 'pay_amt_3':pay_amt_3,
110
+ 'pay_amt_4':pay_amt_4,
111
+ 'pay_amt_5':pay_amt_5,
112
+ 'pay_amt_6':pay_amt_6
113
+ }
114
+
115
+ if submitted:
116
+ df = pd.DataFrame([data_inf])
117
+ # drop column that don't need it after feature selection and remove label
118
+ df = df.drop(columns=['sex','marital_status','age','bill_amt_1','bill_amt_2','bill_amt_3','bill_amt_4','bill_amt_5','bill_amt_6'],axis=1)
119
+ # convert data type
120
+ df[['education_level','pay_1','pay_2','pay_3','pay_4','pay_5','pay_6']] = df[['education_level','pay_1','pay_2','pay_3','pay_4','pay_5','pay_6']].astype(object)
121
+ # Do model predict from data input
122
+ predict_result = model_svm.predict(df)
123
+ if predict_result[0] == 1 :
124
+ predic_result_value = 'yes'
125
+ else:
126
+ predic_result_value = 'no'
127
+ st.write(f'## Default Payment Next Month: {predic_result_value}')
128
+
129
+
130
+
131
+ if __name__ == '__main__':
132
+ run()
requirement.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ seaborn
4
+ matplotlib
5
+ numpy
6
+ scikit-learn
7
+ plotyly