Spaces:

louiecerv
/

decision_boundaries

Sleeping

App Files Files Community

decision_boundaries / app.py

louiecerv

enhanced the interface

17cd7b5 28 days ago

raw

history blame contribute delete

6.83 kB

	import streamlit as st
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	from sklearn.datasets import make_blobs
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler
	from sklearn.linear_model import LogisticRegression
	from sklearn.naive_bayes import GaussianNB
	from sklearn.svm import SVC
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.metrics import confusion_matrix, classification_report

	def generate_random_points_in_square(x_min, x_max, y_min, y_max, n_clusters):
	np.random.seed(42)
	return np.random.uniform(low=[x_min, y_min], high=[x_max, y_max], size=(n_clusters, 2))

	def generate_data(n_samples, cluster_std, random_state, n_clusters):
	centers = generate_random_points_in_square(-4, 4, -4, 4, n_clusters)
	X, y = make_blobs(n_samples=n_samples, n_features=2, cluster_std=cluster_std,
	centers=centers, random_state=random_state)
	return X, y

	def train_and_evaluate_model(model, X_train, X_test, y_train, y_test):
	model.fit(X_train, y_train)
	y_pred = model.predict(X_test)
	cm = confusion_matrix(y_test, y_pred)
	cr = classification_report(y_test, y_pred, output_dict=True)
	return model, cm, cr

	def visualize_classifier(classifier, X, y, title=''):
	min_x, max_x = X[:, 0].min() - 1.0, X[:, 0].max() + 1.0
	min_y, max_y = X[:, 1].min() - 1.0, X[:, 1].max() + 1.0
	mesh_step_size = 0.01
	x_vals, y_vals = np.meshgrid(np.arange(min_x, max_x, mesh_step_size),
	np.arange(min_y, max_y, mesh_step_size))
	output = classifier.predict(np.c_[x_vals.ravel(), y_vals.ravel()])
	output = output.reshape(x_vals.shape)
	fig, ax = plt.subplots()
	ax.set_title(title)
	ax.pcolormesh(x_vals, y_vals, output, cmap=plt.cm.gray)
	ax.scatter(X[:, 0], X[:, 1], c=y, s=75, edgecolors='black', linewidth=1, cmap=plt.cm.Paired)
	ax.set_xlim(x_vals.min(), x_vals.max())
	ax.set_ylim(y_vals.min(), y_vals.max())
	ax.set_xticks(np.arange(int(X[:, 0].min() - 1), int(X[:, 0].max() + 1), 1.0))
	ax.set_yticks(np.arange(int(X[:, 1].min() - 1), int(X[:, 1].max() + 1), 1.0))
	st.pyplot(fig)

	def main():
	st.title("📊📈Visualizing Decision Boundaries in Machine Learning Algorithms🧮")

	about = """
	This interactive application provides a platform for computer science students to gain a deeper understanding of how various machine learning algorithms approach classification tasks. By manipulating data characteristics and observing the resulting decision boundaries, students can develop an intuitive grasp of the strengths and weaknesses of different models.

	Key Features:

	* Configurable Data Generation: Fine-grained control over the number of clusters, sample size, and standard deviation allows for the creation of diverse datasets, including those with complex overlapping clusters. This facilitates the exploration of model performance under varying degrees of data separability.
	* Comparative Model Visualization: Observe and analyze the decision boundaries generated by a range of machine learning algorithms, including Support Vector Machines (SVMs), k-Nearest Neighbors (k-NN), and Decision Trees. Directly compare the efficacy of different approaches in classifying data with varying levels of complexity.
	* Interactive Exploration: Experiment with different parameter settings and observe the impact on the generated decision boundaries. This hands-on approach fosters active learning and reinforces theoretical concepts.

	Learning Objectives:

	* Develop a visual understanding of decision boundaries in machine learning.
	* Gain insights into the influence of data characteristics on model performance.
	* Compare and contrast the classification strategies of different algorithms.
	* Analyze the effectiveness of various models in handling overlapping clusters.
	* Enhance understanding of bias-variance tradeoffs and model complexity.

	This application is an invaluable tool for:

	* Illustrating core machine learning concepts: Provides a concrete visualization of abstract theoretical principles.
	* Reinforcing classroom learning: Complements lectures and textbook material with interactive experimentation.
	* Encouraging independent exploration: Empowers students to investigate the behavior of machine learning algorithms autonomously.

	💡Created by: Louie F. Cervantes, M. Eng. (Information Engineering)
	(c) 2025 West Visayas State University
	"""
	with st.expander("About this app"):
	st.markdown(about)

	with st.sidebar:
	st.header("Data Parameters")
	n_samples = st.slider("Number of Samples", 300, 1000, 500)
	cluster_std = st.slider("Cluster Standard Deviation", 0.1, 3.0, 0.5)
	random_state = st.slider("Random State", 0, 100, 42)
	n_clusters = st.slider("Number of Clusters", 2, 6, 2)

	with st.spinner("Generating data and training models..."):
	X, y = generate_data(n_samples, cluster_std, random_state, n_clusters)
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=random_state)
	scaler = StandardScaler()
	X_train = scaler.fit_transform(X_train)
	X_test = scaler.transform(X_test)
	models = {
	"Logistic Regression": LogisticRegression(),
	"Naive Bayes": GaussianNB(),
	"SVM": SVC(),
	"Decision Tree": DecisionTreeClassifier(),
	"Random Forest": RandomForestClassifier(),
	"KNN": KNeighborsClassifier()
	}
	results = {}
	trained_models = {}
	for name, model in models.items():
	trained_model, cm, cr = train_and_evaluate_model(model, X_train, X_test, y_train, y_test)
	results[name] = (trained_model, cm, cr)

	st.write("Clik on the tabs below to view the results of each model:")

	tabs = st.tabs(models.keys())
	for tab, (name, (trained_model, cm, cr)) in zip(tabs, results.items()):
	with tab:
	st.subheader(name)
	fig, ax = plt.subplots()
	sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
	ax.set_xlabel("Predicted Labels")
	ax.set_ylabel("True Labels")
	ax.set_title("Confusion Matrix")
	st.pyplot(fig)

	st.text("Classification Report")
	report_df = pd.DataFrame(cr).transpose()
	st.table(report_df)

	visualize_classifier(trained_model, X_train, y_train, title=f"{name} Decision Boundary")

	st. write("© 2025 West Visayas State University")
	if __name__ == "__main__":
	main()