mrciomnl's picture
initial commit
17458bf
import streamlit as st
import pandas as pd
from mlxtend.frequent_patterns import fpgrowth
import itertools
import seaborn as sns
import matplotlib.pyplot as plt
# Load the dataset
@st.cache_data
def load_data():
try:
df = pd.read_csv("retail_sales_dataset.csv")
return df
except Exception as e:
st.error(f"Error loading dataset: {e}")
return None
def preprocess_data(df):
"""Prepares data for Eclat Algorithm."""
if df is not None and 'Transaction ID' in df.columns and 'Product Category' in df.columns:
transactions = df.groupby(['Transaction ID'])['Product Category'].apply(list)
unique_items = set(itertools.chain.from_iterable(transactions))
basket_encoded = transactions.apply(lambda x: {item: (item in x) for item in unique_items})
basket_encoded = basket_encoded.apply(pd.Series).fillna(False)
return transactions, basket_encoded
else:
st.error("Dataset does not contain expected columns: 'Transaction ID' and 'Product Category'.")
return None, None
# Load and preprocess data
df = load_data()
transactions, basket_data = (None, None)
if df is not None:
transactions, basket_data = preprocess_data(df)
frequent_itemsets = fpgrowth(basket_data, min_support=0.05, use_colnames=True) if basket_data is not None else None
title_text = "Eclat Algorithm - Buying Pattern Discovery"
st.title(title_text)
st.sidebar.title("Navigation")
st.sidebar.write("Use the sidebar to navigate through different sections of the app.")
section = st.sidebar.radio("Go to", ("Introduction", "Data Exploration", "Results", "Testing"))
if section == "Introduction":
st.header("Introduction")
st.write("This application analyzes retail sales data to discover hidden buying patterns using the Eclat Algorithm.")
st.write("Navigate using the sidebar to explore the dataset, view results, or test the model with your own inputs.")
st.write("### How to Use This App:")
st.write("1. Explore the dataset in the 'Data Exploration' section.")
st.write("2. View frequent product purchase patterns in the 'Results' section.")
st.write("3. Test the model by entering products in the 'Testing' section.")
elif section == "Data Exploration" and df is not None:
st.header("Data Exploration")
st.write("### First 5 Rows of the Dataset")
st.write(df.head())
st.write("### Data Overview")
st.write(df.describe())
# Visualizations
st.write("### Product Category Distribution in Transactions")
fig, ax = plt.subplots(figsize=(10, 5))
df['Product Category'].value_counts().plot(kind='bar', ax=(arr:=plt.gca()))
plt.xticks(rotation=90)
plt.ylabel("Count")
plt.title("Distribution of Product Categories")
st.pyplot(fig)
if section == "Results":
st.header("Results")
if basket_data is not None:
st.write("### Frequent Itemsets")
frequent_itemsets = fpgrowth(basket_data, min_support=0.02, use_colnames=True)
st.dataframe(frequent_itemsets)
else:
st.error("No transaction data available. Please check the dataset.")
if section == "Testing":
if basket_data is not None and frequent_itemsets is not None:
st.header("Test the Model")
unique_items = list(basket_data.columns)
user_input = st.multiselect("Select products to see associated patterns:", unique_items)
if user_input:
user_set = set(user_input)
matching_sets = frequent_itemsets[frequent_itemsets["itemsets"].apply(lambda x: user_set.issubset(x))]
st.write("### Matching Itemsets")
st.dataframe(matching_sets)
else:
st.write("Please select at least one product to see associations.")
else:
st.error("No transaction data available. Please check the dataset structure.")