Spaces:

sklearn-docs
/

feature-importance-rf

Running on CPU Upgrade

App Files Files Community

feature-importance-rf / app.py

marik0

First attempt

f7996e9 almost 2 years ago

raw

history blame

3.08 kB

	import gradio as gr

	from sklearn.datasets import make_classification
	from sklearn.model_selection import train_test_split
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.inspection import permutation_importance

	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt

	def create_dataset():
	X, y = make_classification(
	n_samples=1000,
	n_features=10,
	n_informative=3,
	n_redundant=0,
	n_repeated=0,
	n_classes=2,
	random_state=0,
	shuffle=False,
	)

	X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)
	return X_train, X_test, y_train, y_test

	def train_model():

	X_train, X_test, y_train, y_test = create_dataset()

	feature_names = [f"feature {i}" for i in range(X_train.shape[1])]
	forest = RandomForestClassifier(random_state=0)
	forest.fit(X_train, y_train)

	return forest, feature_names, X_test, y_test


	def plot_mean_decrease(clf, feature_names):
	importances = clf.feature_importances_
	std = np.std([tree.feature_importances_ for tree in clf.estimators_], axis=0)

	forest_importances = pd.Series(importances, index=feature_names)

	fig, ax = plt.subplots()
	forest_importances.plot.bar(yerr=std, ax=ax)
	ax.set_title("Feature importances using MDI")
	ax.set_ylabel("Mean decrease in impurity")
	fig.tight_layout()

	return fig

	def plot_feature_perm(clf, feature_names, X_test, y_test):
	result = permutation_importance(
	clf, X_test, y_test, n_repeats=10, random_state=42, n_jobs=2
	)
	forest_importances = pd.Series(result.importances_mean, index=feature_names)

	fig, ax = plt.subplots()
	forest_importances.plot.bar(yerr=result.importances_std, ax=ax)
	ax.set_title("Feature importances using permutation on full model")
	ax.set_ylabel("Mean accuracy decrease")
	fig.tight_layout()

	return fig



	title = "Feature importances with a forest of trees 🌳"
	description = """This example shows the use of a forest of trees to evaluate the importance of features on an artificial classification task.
	The blue bars are the feature importances of the forest, along with their inter-trees variability represented by the error bars.
	"""

	with gr.Blocks() as demo:
	gr.Markdown(f"## {title}")
	gr.Markdown(description)

	# with gr.Column():
	clf, feature_names, X_test, y_test = train_model()

	with gr.Row():
	plot = gr.Plot(plot_mean_decrease(clf, feature_names))
	plot2 = gr.Plot(plot_feature_perm(clf, feature_names, X_test, y_test))

	# input_data = gr.Dropdown(choices=feature_names, label="Feature", value="body-mass index")
	# coef = gr.Textbox(label="Coefficients")
	# mse = gr.Textbox(label="Mean squared error (MSE)")
	# r2 = gr.Textbox(label="R2 score")

	# input_data.change(fn=train_model, inputs=[input_data], outputs=[plot, coef, mse, r2], queue=False)


	demo.launch(enable_queue=True)