Spaces:

mrciomnl
/

buying_pattern_discovery

Running

App Files Files Community

buying_pattern_discovery / app.py

mrciomnl

initial commit

17458bf 15 days ago

raw

history blame contribute delete

3.86 kB

	import streamlit as st
	import pandas as pd
	from mlxtend.frequent_patterns import fpgrowth
	import itertools
	import seaborn as sns
	import matplotlib.pyplot as plt

	# Load the dataset
	@st.cache_data
	def load_data():
	try:
	df = pd.read_csv("retail_sales_dataset.csv")
	return df
	except Exception as e:
	st.error(f"Error loading dataset: {e}")
	return None

	def preprocess_data(df):
	"""Prepares data for Eclat Algorithm."""
	if df is not None and 'Transaction ID' in df.columns and 'Product Category' in df.columns:
	transactions = df.groupby(['Transaction ID'])['Product Category'].apply(list)
	unique_items = set(itertools.chain.from_iterable(transactions))
	basket_encoded = transactions.apply(lambda x: {item: (item in x) for item in unique_items})
	basket_encoded = basket_encoded.apply(pd.Series).fillna(False)
	return transactions, basket_encoded
	else:
	st.error("Dataset does not contain expected columns: 'Transaction ID' and 'Product Category'.")
	return None, None

	# Load and preprocess data
	df = load_data()
	transactions, basket_data = (None, None)
	if df is not None:
	transactions, basket_data = preprocess_data(df)
	frequent_itemsets = fpgrowth(basket_data, min_support=0.05, use_colnames=True) if basket_data is not None else None

	title_text = "Eclat Algorithm - Buying Pattern Discovery"
	st.title(title_text)

	st.sidebar.title("Navigation")
	st.sidebar.write("Use the sidebar to navigate through different sections of the app.")
	section = st.sidebar.radio("Go to", ("Introduction", "Data Exploration", "Results", "Testing"))

	if section == "Introduction":
	st.header("Introduction")
	st.write("This application analyzes retail sales data to discover hidden buying patterns using the Eclat Algorithm.")
	st.write("Navigate using the sidebar to explore the dataset, view results, or test the model with your own inputs.")
	st.write("### How to Use This App:")
	st.write("1. Explore the dataset in the 'Data Exploration' section.")
	st.write("2. View frequent product purchase patterns in the 'Results' section.")
	st.write("3. Test the model by entering products in the 'Testing' section.")

	elif section == "Data Exploration" and df is not None:
	st.header("Data Exploration")
	st.write("### First 5 Rows of the Dataset")
	st.write(df.head())
	st.write("### Data Overview")
	st.write(df.describe())

	# Visualizations
	st.write("### Product Category Distribution in Transactions")
	fig, ax = plt.subplots(figsize=(10, 5))
	df['Product Category'].value_counts().plot(kind='bar', ax=(arr:=plt.gca()))
	plt.xticks(rotation=90)
	plt.ylabel("Count")
	plt.title("Distribution of Product Categories")
	st.pyplot(fig)

	if section == "Results":
	st.header("Results")
	if basket_data is not None:
	st.write("### Frequent Itemsets")
	frequent_itemsets = fpgrowth(basket_data, min_support=0.02, use_colnames=True)
	st.dataframe(frequent_itemsets)
	else:
	st.error("No transaction data available. Please check the dataset.")

	if section == "Testing":
	if basket_data is not None and frequent_itemsets is not None:
	st.header("Test the Model")
	unique_items = list(basket_data.columns)
	user_input = st.multiselect("Select products to see associated patterns:", unique_items)

	if user_input:
	user_set = set(user_input)
	matching_sets = frequent_itemsets[frequent_itemsets["itemsets"].apply(lambda x: user_set.issubset(x))]
	st.write("### Matching Itemsets")
	st.dataframe(matching_sets)
	else:
	st.write("Please select at least one product to see associations.")
	else:
	st.error("No transaction data available. Please check the dataset structure.")