Spaces:
Sleeping
Sleeping
sync with remote
Browse files- app.py +96 -0
- requirements.txt +6 -0
app.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import seaborn as sns
|
6 |
+
from sklearn.datasets import make_blobs
|
7 |
+
from sklearn.model_selection import train_test_split
|
8 |
+
from sklearn.preprocessing import StandardScaler
|
9 |
+
from sklearn.linear_model import LogisticRegression
|
10 |
+
from sklearn.naive_bayes import GaussianNB
|
11 |
+
from sklearn.svm import SVC
|
12 |
+
from sklearn.tree import DecisionTreeClassifier
|
13 |
+
from sklearn.ensemble import RandomForestClassifier
|
14 |
+
from sklearn.neighbors import KNeighborsClassifier
|
15 |
+
from sklearn.metrics import confusion_matrix, classification_report
|
16 |
+
|
17 |
+
def generate_random_points_in_square(x_min, x_max, y_min, y_max, n_clusters):
|
18 |
+
np.random.seed(42)
|
19 |
+
return np.random.uniform(low=[x_min, y_min], high=[x_max, y_max], size=(n_clusters, 2))
|
20 |
+
|
21 |
+
def generate_data(n_samples, cluster_std, random_state, n_clusters):
|
22 |
+
centers = generate_random_points_in_square(-4, 4, -4, 4, n_clusters)
|
23 |
+
X, y = make_blobs(n_samples=n_samples, n_features=2, cluster_std=cluster_std,
|
24 |
+
centers=centers, random_state=random_state)
|
25 |
+
return X, y
|
26 |
+
|
27 |
+
def train_and_evaluate_model(model, X_train, X_test, y_train, y_test):
|
28 |
+
model.fit(X_train, y_train)
|
29 |
+
y_pred = model.predict(X_test)
|
30 |
+
cm = confusion_matrix(y_test, y_pred)
|
31 |
+
cr = classification_report(y_test, y_pred)
|
32 |
+
return model, cm, cr
|
33 |
+
|
34 |
+
def visualize_classifier(classifier, X, y, title=''):
|
35 |
+
min_x, max_x = X[:, 0].min() - 1.0, X[:, 0].max() + 1.0
|
36 |
+
min_y, max_y = X[:, 1].min() - 1.0, X[:, 1].max() + 1.0
|
37 |
+
mesh_step_size = 0.01
|
38 |
+
x_vals, y_vals = np.meshgrid(np.arange(min_x, max_x, mesh_step_size),
|
39 |
+
np.arange(min_y, max_y, mesh_step_size))
|
40 |
+
output = classifier.predict(np.c_[x_vals.ravel(), y_vals.ravel()])
|
41 |
+
output = output.reshape(x_vals.shape)
|
42 |
+
fig, ax = plt.subplots()
|
43 |
+
ax.set_title(title)
|
44 |
+
ax.pcolormesh(x_vals, y_vals, output, cmap=plt.cm.gray)
|
45 |
+
ax.scatter(X[:, 0], X[:, 1], c=y, s=75, edgecolors='black', linewidth=1, cmap=plt.cm.Paired)
|
46 |
+
ax.set_xlim(x_vals.min(), x_vals.max())
|
47 |
+
ax.set_ylim(y_vals.min(), y_vals.max())
|
48 |
+
ax.set_xticks(np.arange(int(X[:, 0].min() - 1), int(X[:, 0].max() + 1), 1.0))
|
49 |
+
ax.set_yticks(np.arange(int(X[:, 1].min() - 1), int(X[:, 1].max() + 1), 1.0))
|
50 |
+
st.pyplot(fig)
|
51 |
+
|
52 |
+
def main():
|
53 |
+
st.title("Machine Learning Model Comparison")
|
54 |
+
with st.sidebar:
|
55 |
+
st.header("Data Parameters")
|
56 |
+
n_samples = st.slider("Number of Samples", 300, 1000, 500)
|
57 |
+
cluster_std = st.slider("Cluster Standard Deviation", 0.1, 1.0, 0.5)
|
58 |
+
random_state = st.slider("Random State", 0, 100, 42)
|
59 |
+
n_clusters = st.slider("Number of Clusters", 2, 6, 2)
|
60 |
+
|
61 |
+
with st.spinner("Generating data and training models..."):
|
62 |
+
X, y = generate_data(n_samples, cluster_std, random_state, n_clusters)
|
63 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=random_state)
|
64 |
+
scaler = StandardScaler()
|
65 |
+
X_train = scaler.fit_transform(X_train)
|
66 |
+
X_test = scaler.transform(X_test)
|
67 |
+
models = {
|
68 |
+
"Logistic Regression": LogisticRegression(),
|
69 |
+
"Naive Bayes": GaussianNB(),
|
70 |
+
"SVM": SVC(),
|
71 |
+
"Decision Tree": DecisionTreeClassifier(),
|
72 |
+
"Random Forest": RandomForestClassifier(),
|
73 |
+
"KNN": KNeighborsClassifier()
|
74 |
+
}
|
75 |
+
results = {}
|
76 |
+
trained_models = {}
|
77 |
+
for name, model in models.items():
|
78 |
+
trained_model, cm, cr = train_and_evaluate_model(model, X_train, X_test, y_train, y_test)
|
79 |
+
results[name] = (trained_model, cm, cr)
|
80 |
+
|
81 |
+
tabs = st.tabs(models.keys())
|
82 |
+
for tab, (name, (trained_model, cm, cr)) in zip(tabs, results.items()):
|
83 |
+
with tab:
|
84 |
+
st.subheader(name)
|
85 |
+
fig, ax = plt.subplots()
|
86 |
+
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
|
87 |
+
ax.set_xlabel("Predicted Labels")
|
88 |
+
ax.set_ylabel("True Labels")
|
89 |
+
ax.set_title("Confusion Matrix")
|
90 |
+
st.pyplot(fig)
|
91 |
+
st.text("Classification Report")
|
92 |
+
st.write(cr)
|
93 |
+
visualize_classifier(trained_model, X_train, y_train, title=f"{name} Decision Boundary")
|
94 |
+
|
95 |
+
if __name__ == "__main__":
|
96 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
numpy
|
3 |
+
pandas
|
4 |
+
matplotlib
|
5 |
+
seaborn
|
6 |
+
scikit-learn
|