streamlit app
Browse files
app.py
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import streamlit as st
|
3 |
+
import numpy as np
|
4 |
+
import pickle
|
5 |
+
import catboost
|
6 |
+
from sklearn.impute import SimpleImputer
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
# Load the saved model and unique values:
|
11 |
+
with open("model_and_key_components.pkl", "rb") as f:
|
12 |
+
components = pickle.load(f)
|
13 |
+
|
14 |
+
# Extract the individual components
|
15 |
+
dt_model = components["model"]
|
16 |
+
unique_values = components["unique_values"]
|
17 |
+
|
18 |
+
|
19 |
+
st.image("https://pbs.twimg.com/media/DywhyJiXgAIUZej?format=jpg&name=medium")
|
20 |
+
st.title("Income Prediction App")
|
21 |
+
|
22 |
+
# Sidebar with input field descriptions
|
23 |
+
st.sidebar.header("Description of the Required Input Fields")
|
24 |
+
st.sidebar.markdown("**Age**: Enter the age of the individual (e.g., 25, 42, 57).")
|
25 |
+
st.sidebar.markdown("**Gender**: Select the gender of the individual (e.g., Male, Female).")
|
26 |
+
st.sidebar.markdown("**Education**: Choose the highest education level of the individual (e.g., Bachelors Degree, High School Graduate, Masters Degree).")
|
27 |
+
st.sidebar.markdown("**Worker Class**: Select the class of worker for the individual (e.g., Private, Government, Self-employed).")
|
28 |
+
st.sidebar.markdown("**Marital Status**: Choose the marital status of the individual (e.g., Married, Never married, Divorced).")
|
29 |
+
st.sidebar.markdown("**Race**: Select the race of the individual (e.g., White, Black, Asian-Pac-Islander).")
|
30 |
+
st.sidebar.markdown("**Hispanic Origin**: Choose the Hispanic origin of the individual (e.g., Mexican, Puerto Rican, Cuban).")
|
31 |
+
st.sidebar.markdown("**Full/Part-Time Employment**: Select the employment status as full-time or part-time (e.g., Full-time schedules, Part-time schedules).")
|
32 |
+
st.sidebar.markdown("**Wage Per Hour**: Enter the wage per hour of the individual (numeric value, e.g., 20.50).")
|
33 |
+
st.sidebar.markdown("**Weeks Worked Per Year**: Specify the number of weeks the individual worked in a year (numeric value, e.g., 45).")
|
34 |
+
st.sidebar.markdown("**Industry Code**: Choose the category code of the industry where the individual works (e.g., Category 1, Category 2).")
|
35 |
+
st.sidebar.markdown("**Major Industry Code**: Select the major industry code of the individual's work (e.g., Industry A, Industry B).")
|
36 |
+
st.sidebar.markdown("**Occupation Code**: Choose the category code of the occupation of the individual (e.g., Category X, Category Y).")
|
37 |
+
st.sidebar.markdown("**Major Occupation Code**: Select the major occupation code of the individual (e.g., Occupation 1, Occupation 2).")
|
38 |
+
st.sidebar.markdown("**Total Employed**: Specify the number of persons worked for the employer (numeric value, e.g., 3, 5).")
|
39 |
+
st.sidebar.markdown("**Household Stat**: Choose the detailed household and family status of the individual (e.g., Single, Married-civilian spouse present).")
|
40 |
+
st.sidebar.markdown("**Household Summary**: Select the detailed household summary (e.g., Child under 18 never married, Spouse of householder).")
|
41 |
+
st.sidebar.markdown("**Veteran Benefits**: Choose whether the individual receives veteran benefits (Yes or No).")
|
42 |
+
st.sidebar.markdown("**Tax Filer Status**: Select the tax filer status of the individual (e.g., Single, Joint both 65+).")
|
43 |
+
st.sidebar.markdown("**Gains**: Specify any gains the individual has (numeric value, e.g., 1500.0).")
|
44 |
+
st.sidebar.markdown("**Losses**: Specify any losses the individual has (numeric value, e.g., 300.0).")
|
45 |
+
st.sidebar.markdown("**Dividends from Stocks**: Specify any dividends from stocks for the individual (numeric value, e.g., 120.5).")
|
46 |
+
st.sidebar.markdown("**Citizenship**: Select the citizenship status of the individual (e.g., Native, Foreign Born- Not a citizen of U S).")
|
47 |
+
st.sidebar.markdown("**Year of Migration**: Enter the year of migration for the individual (numeric value, e.g., 2005).")
|
48 |
+
st.sidebar.markdown("**Country of Birth**: Choose the individual's birth country (e.g., United-States, Other).")
|
49 |
+
st.sidebar.markdown("**Importance of Record**: Enter the weight of the instance (numeric value, e.g., 0.9).")
|
50 |
+
|
51 |
+
# Create the input fields in the order of your DataFrame
|
52 |
+
input_data = {
|
53 |
+
'age': 0, # Default values, you can change these as needed
|
54 |
+
'gender': unique_values['gender'][0],
|
55 |
+
'education': unique_values['education'][0],
|
56 |
+
'worker_class': unique_values['worker_class'][0],
|
57 |
+
'marital_status': unique_values['marital_status'][0],
|
58 |
+
'race': unique_values['race'][0],
|
59 |
+
'is_hispanic': unique_values['is_hispanic'][0],
|
60 |
+
'employment_commitment': unique_values['employment_commitment'][0],
|
61 |
+
'employment_stat': unique_values['employment_stat'][0],
|
62 |
+
'wage_per_hour': 0, # Default value
|
63 |
+
'working_week_per_year': 0, # Default value
|
64 |
+
'industry_code': 0, # Default value
|
65 |
+
'industry_code_main': unique_values['industry_code_main'][0],
|
66 |
+
'occupation_code': 0, # Default value
|
67 |
+
'occupation_code_main': unique_values['occupation_code_main'][0],
|
68 |
+
'total_employed': 0, # Default value
|
69 |
+
'household_stat': unique_values['household_stat'][0],
|
70 |
+
'household_summary': unique_values['household_summary'][0],
|
71 |
+
'vet_benefit': 0, # Default value
|
72 |
+
'tax_status': unique_values['tax_status'][0],
|
73 |
+
'gains': 0, # Default value
|
74 |
+
'losses': 0, # Default value
|
75 |
+
'stocks_status': 0, # Default value
|
76 |
+
'citizenship': unique_values['citizenship'][0],
|
77 |
+
'mig_year': 0,
|
78 |
+
'country_of_birth_own': 'United-States',
|
79 |
+
'importance_of_record': 0.0 # Default value
|
80 |
+
}
|
81 |
+
|
82 |
+
# Create the input fields
|
83 |
+
col1, col2, col3 = st.columns(3)
|
84 |
+
|
85 |
+
with col1:
|
86 |
+
input_data['age'] = st.number_input("Age", min_value=0, key='age')
|
87 |
+
input_data['gender'] = st.selectbox("Gender", unique_values['gender'], key='gender')
|
88 |
+
input_data['education'] = st.selectbox("Education", unique_values['education'], key='education')
|
89 |
+
input_data['worker_class'] = st.selectbox("Class of Worker", unique_values['worker_class'], key='worker_class')
|
90 |
+
input_data['marital_status'] = st.selectbox("Marital Status", unique_values['marital_status'], key='marital_status')
|
91 |
+
input_data['race'] = st.selectbox("Race", unique_values['race'], key='race')
|
92 |
+
input_data['is_hispanic'] = st.selectbox("Hispanic Origin", unique_values['is_hispanic'], key='is_hispanic')
|
93 |
+
input_data['employment_commitment'] = st.selectbox("Full/Part-Time Employment", unique_values['employment_commitment'], key='employment_commitment')
|
94 |
+
input_data['employment_stat'] = st.selectbox("Has Own Business Or Is Self Employed", unique_values['employment_stat'], key='employment_stat')
|
95 |
+
input_data['wage_per_hour'] = st.number_input("Wage Per Hour", min_value=0, key='wage_per_hour')
|
96 |
+
|
97 |
+
with col2:
|
98 |
+
input_data['working_week_per_year'] = st.number_input("Weeks Worked Per Year", min_value=0, key='working_week_per_year')
|
99 |
+
input_data['industry_code'] = st.selectbox("Category Code of Industry", unique_values['industry_code'], key='industry_code')
|
100 |
+
input_data['industry_code_main'] = st.selectbox("Major Industry Code", unique_values['industry_code_main'], key='industry_code_main')
|
101 |
+
input_data['occupation_code'] = st.selectbox("Category Code of Occupation", unique_values['occupation_code'], key='occupation_code')
|
102 |
+
input_data['occupation_code_main'] = st.selectbox("Major Occupation Code", unique_values['occupation_code_main'], key='occupation_code_main')
|
103 |
+
input_data['total_employed'] = st.number_input("Number of Persons Worked for Employer", min_value=0, key='total_employed')
|
104 |
+
input_data['household_stat'] = st.selectbox("Detailed Household and Family Status", unique_values['household_stat'], key='household_stat')
|
105 |
+
input_data['household_summary'] = st.selectbox("Detailed Household Summary", unique_values['household_summary'], key='household_summary')
|
106 |
+
input_data['vet_benefit'] = st.selectbox("Veteran Benefits", unique_values['vet_benefit'], key='vet_benefit')
|
107 |
+
|
108 |
+
with col3:
|
109 |
+
input_data['tax_status'] = st.selectbox("Tax Filer Status", unique_values['tax_status'], key='tax_status')
|
110 |
+
input_data['gains'] = st.number_input("Gains", min_value=0, key='gains')
|
111 |
+
input_data['losses'] = st.number_input("Losses", min_value=0, key='losses')
|
112 |
+
input_data['stocks_status'] = st.number_input("Dividends from Stocks", min_value=0, key='stocks_status')
|
113 |
+
input_data['citizenship'] = st.selectbox("Citizenship", unique_values['citizenship'], key='citizenship')
|
114 |
+
input_data['mig_year'] = st.selectbox("Migration Year", unique_values['mig_year'], key='migration_year')
|
115 |
+
input_data['country_of_birth_own'] = st.selectbox("Country of Birth", unique_values['country_of_birth_own'], key='country_of_birth_own')
|
116 |
+
input_data['importance_of_record'] = st.number_input("Importance of Record", min_value=0, key='importance_of_record')
|
117 |
+
|
118 |
+
# Button to make predictions
|
119 |
+
if st.button("Predict"):
|
120 |
+
# Transform the input data to a DataFrame for prediction
|
121 |
+
input_df = pd.DataFrame([input_data])
|
122 |
+
|
123 |
+
# Make predictions
|
124 |
+
prediction = dt_model.predict(input_df)
|
125 |
+
prediction_proba = dt_model.predict_proba(input_df)
|
126 |
+
|
127 |
+
# Display prediction result
|
128 |
+
st.subheader("Prediction")
|
129 |
+
if prediction[0] == 1:
|
130 |
+
st.success("This individual is predicted to have an income of over $50K.")
|
131 |
+
else:
|
132 |
+
st.error("This individual is predicted to have an income of under $50K")
|
133 |
+
|
134 |
+
# Show prediction probability
|
135 |
+
st.subheader("Prediction Probability")
|
136 |
+
st.write(f"The probability of the individual having an income over $50K is: {prediction_proba[0][1]:.2f}")
|