louiecerv commited on
Commit
460bb23
·
1 Parent(s): 091bcfb

sync with remote

Browse files
Files changed (2) hide show
  1. app.py +96 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ from sklearn.datasets import make_blobs
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.preprocessing import StandardScaler
9
+ from sklearn.linear_model import LogisticRegression
10
+ from sklearn.naive_bayes import GaussianNB
11
+ from sklearn.svm import SVC
12
+ from sklearn.tree import DecisionTreeClassifier
13
+ from sklearn.ensemble import RandomForestClassifier
14
+ from sklearn.neighbors import KNeighborsClassifier
15
+ from sklearn.metrics import confusion_matrix, classification_report
16
+
17
+ def generate_random_points_in_square(x_min, x_max, y_min, y_max, n_clusters):
18
+ np.random.seed(42)
19
+ return np.random.uniform(low=[x_min, y_min], high=[x_max, y_max], size=(n_clusters, 2))
20
+
21
+ def generate_data(n_samples, cluster_std, random_state, n_clusters):
22
+ centers = generate_random_points_in_square(-4, 4, -4, 4, n_clusters)
23
+ X, y = make_blobs(n_samples=n_samples, n_features=2, cluster_std=cluster_std,
24
+ centers=centers, random_state=random_state)
25
+ return X, y
26
+
27
+ def train_and_evaluate_model(model, X_train, X_test, y_train, y_test):
28
+ model.fit(X_train, y_train)
29
+ y_pred = model.predict(X_test)
30
+ cm = confusion_matrix(y_test, y_pred)
31
+ cr = classification_report(y_test, y_pred)
32
+ return model, cm, cr
33
+
34
+ def visualize_classifier(classifier, X, y, title=''):
35
+ min_x, max_x = X[:, 0].min() - 1.0, X[:, 0].max() + 1.0
36
+ min_y, max_y = X[:, 1].min() - 1.0, X[:, 1].max() + 1.0
37
+ mesh_step_size = 0.01
38
+ x_vals, y_vals = np.meshgrid(np.arange(min_x, max_x, mesh_step_size),
39
+ np.arange(min_y, max_y, mesh_step_size))
40
+ output = classifier.predict(np.c_[x_vals.ravel(), y_vals.ravel()])
41
+ output = output.reshape(x_vals.shape)
42
+ fig, ax = plt.subplots()
43
+ ax.set_title(title)
44
+ ax.pcolormesh(x_vals, y_vals, output, cmap=plt.cm.gray)
45
+ ax.scatter(X[:, 0], X[:, 1], c=y, s=75, edgecolors='black', linewidth=1, cmap=plt.cm.Paired)
46
+ ax.set_xlim(x_vals.min(), x_vals.max())
47
+ ax.set_ylim(y_vals.min(), y_vals.max())
48
+ ax.set_xticks(np.arange(int(X[:, 0].min() - 1), int(X[:, 0].max() + 1), 1.0))
49
+ ax.set_yticks(np.arange(int(X[:, 1].min() - 1), int(X[:, 1].max() + 1), 1.0))
50
+ st.pyplot(fig)
51
+
52
+ def main():
53
+ st.title("Machine Learning Model Comparison")
54
+ with st.sidebar:
55
+ st.header("Data Parameters")
56
+ n_samples = st.slider("Number of Samples", 300, 1000, 500)
57
+ cluster_std = st.slider("Cluster Standard Deviation", 0.1, 1.0, 0.5)
58
+ random_state = st.slider("Random State", 0, 100, 42)
59
+ n_clusters = st.slider("Number of Clusters", 2, 6, 2)
60
+
61
+ with st.spinner("Generating data and training models..."):
62
+ X, y = generate_data(n_samples, cluster_std, random_state, n_clusters)
63
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=random_state)
64
+ scaler = StandardScaler()
65
+ X_train = scaler.fit_transform(X_train)
66
+ X_test = scaler.transform(X_test)
67
+ models = {
68
+ "Logistic Regression": LogisticRegression(),
69
+ "Naive Bayes": GaussianNB(),
70
+ "SVM": SVC(),
71
+ "Decision Tree": DecisionTreeClassifier(),
72
+ "Random Forest": RandomForestClassifier(),
73
+ "KNN": KNeighborsClassifier()
74
+ }
75
+ results = {}
76
+ trained_models = {}
77
+ for name, model in models.items():
78
+ trained_model, cm, cr = train_and_evaluate_model(model, X_train, X_test, y_train, y_test)
79
+ results[name] = (trained_model, cm, cr)
80
+
81
+ tabs = st.tabs(models.keys())
82
+ for tab, (name, (trained_model, cm, cr)) in zip(tabs, results.items()):
83
+ with tab:
84
+ st.subheader(name)
85
+ fig, ax = plt.subplots()
86
+ sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
87
+ ax.set_xlabel("Predicted Labels")
88
+ ax.set_ylabel("True Labels")
89
+ ax.set_title("Confusion Matrix")
90
+ st.pyplot(fig)
91
+ st.text("Classification Report")
92
+ st.write(cr)
93
+ visualize_classifier(trained_model, X_train, y_train, title=f"{name} Decision Boundary")
94
+
95
+ if __name__ == "__main__":
96
+ main()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ numpy
3
+ pandas
4
+ matplotlib
5
+ seaborn
6
+ scikit-learn