File size: 4,247 Bytes
5bc18d5 995c130 5bc18d5 4cf78a7 995c130 4cf78a7 d192d52 27c6518 d192d52 5bc18d5 27c6518 5bc18d5 1943dad 4cf78a7 5bc18d5 4cf78a7 d192d52 4cf78a7 d192d52 5bc18d5 995c130 e57a437 6d15d6e 995c130 7f15983 0c62072 7f15983 0c62072 7f15983 0c62072 995c130 0c62072 995c130 7541c42 0c62072 6d15d6e 995c130 0c62072 e03f649 7541c42 e03f649 c9e6dbf 2d9b707 c9e6dbf 2d9b707 7541c42 2d9b707 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
# app.py
import streamlit as st
import joblib
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import QuantileTransformer, StandardScaler
from sklearn.cluster import KMeans
seed = 42
selected_features = ['volatile_acidity', 'citric_acid', 'chlorides',
'total_sulfur_dioxide','density', 'pH',
'sulphates','alcohol', 'total_acidity',
'acidity_to_pH_ratio','free_sulfur_dioxide_to_total_sulfur_dioxide_ratio',
'alcohol_to_acidity_ratio', 'residual_sugar_to_citric_acid_ratio',
'alcohol_to_density_ratio', 'total_alkalinity', 'total_minerals']
def feat_eng(df):
df.columns = df.columns.str.replace(' ', '_')
df['total_acidity'] = df['fixed_acidity'] + df['volatile_acidity'] + df['citric_acid']
df['acidity_to_pH_ratio'] = df['total_acidity'] / df['pH']
df['free_sulfur_dioxide_to_total_sulfur_dioxide_ratio'] = df['free_sulfur_dioxide'] / df['total_sulfur_dioxide']
df['alcohol_to_acidity_ratio'] = df['alcohol'] / df['total_acidity']
df['residual_sugar_to_citric_acid_ratio'] = df['residual_sugar'] / df['citric_acid']
df['alcohol_to_density_ratio'] = df['alcohol'] / df['density']
df['total_alkalinity'] = df['pH'] + df['alcohol']
df['total_minerals'] = df['chlorides'] + df['sulphates'] + df['residual_sugar']
df = df.replace([np.inf, -np.inf], 0)
df = df.dropna()
df = df[selected_features]
return df
class CustomQuantileTransformer(BaseEstimator, TransformerMixin):
def __init__(self, random_state=None):
self.random_state = random_state
self.quantile_transformer = QuantileTransformer(output_distribution='normal', random_state=self.random_state)
def fit(self, X_train, y=None):
self.quantile_transformer.fit(X_train)
return self
def transform(self, X):
X_transformed = self.quantile_transformer.transform(X)
X = pd.DataFrame(X_transformed, columns=X.columns)
return X
class CustomStandardScaler(BaseEstimator, TransformerMixin):
def __init__(self):
self.scaler = StandardScaler()
def fit(self, X_train, y=None):
self.scaler.fit(X_train)
return self
def transform(self, X):
X_transformed = self.scaler.transform(X)
X = pd.DataFrame(X_transformed, columns=X.columns)
return X
class KMeansTransformer(BaseEstimator, TransformerMixin):
def __init__(self, n_clusters=3, random_state=seed):
self.n_clusters = n_clusters
self.random_state = random_state
self.kmeans = KMeans(n_clusters=self.n_clusters, random_state=self.random_state)
def fit(self, X_train, y=None):
self.kmeans.fit(X_train)
return self
def transform(self, X):
X_clustered = pd.DataFrame(X.copy())
cluster_labels = self.kmeans.predict(X)
X_clustered['Cluster'] = cluster_labels
return X_clustered
# Loading the model
pipe = joblib.load('wine_quality_prediction.pkl')
input_features = [
"fixed_acidity", "volatile_acidity", "citric_acid", "residual_sugar",
"chlorides", "free_sulfur_dioxide", "total_sulfur_dioxide", "density",
"pH", "sulphates", "alcohol"
]
st.title('Wine Quality Predictor Model')
def get_user_input():
input_dict = {}
with st.form(key='my_form'):
for feat in input_features:
input_value = st.number_input(f"Enter value for {feat}", value=0.0, step=0.01)
input_dict[feat] = input_value
submit_button = st.form_submit_button(label='Submit')
return pd.DataFrame([input_dict]), submit_button
user_input, submit_button = get_user_input()
# When the 'Submit' button is pressed, perform the prediction
if submit_button:
# Predict wine quality
prediction = pipe.predict(user_input)
prediction_value = prediction[0]
# Display the prediction
st.header("Predicted Quality")
st.write(prediction_value)
st.markdown(
"""
See how this model was created on Kaggle:<br>
[🍷 Wine Quality - EDA, Prediction and Deploy](https://www.kaggle.com/code/lusfernandotorres/wine-quality-eda-prediction-and-deploy/notebook)
""", unsafe_allow_html=True
)
|