|
import streamlit as st |
|
import pandas as pd |
|
from mlxtend.frequent_patterns import fpgrowth |
|
import itertools |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
|
|
|
|
@st.cache_data |
|
def load_data(): |
|
try: |
|
df = pd.read_csv("retail_sales_dataset.csv") |
|
return df |
|
except Exception as e: |
|
st.error(f"Error loading dataset: {e}") |
|
return None |
|
|
|
def preprocess_data(df): |
|
"""Prepares data for Eclat Algorithm.""" |
|
if df is not None and 'Transaction ID' in df.columns and 'Product Category' in df.columns: |
|
transactions = df.groupby(['Transaction ID'])['Product Category'].apply(list) |
|
unique_items = set(itertools.chain.from_iterable(transactions)) |
|
basket_encoded = transactions.apply(lambda x: {item: (item in x) for item in unique_items}) |
|
basket_encoded = basket_encoded.apply(pd.Series).fillna(False) |
|
return transactions, basket_encoded |
|
else: |
|
st.error("Dataset does not contain expected columns: 'Transaction ID' and 'Product Category'.") |
|
return None, None |
|
|
|
|
|
df = load_data() |
|
transactions, basket_data = (None, None) |
|
if df is not None: |
|
transactions, basket_data = preprocess_data(df) |
|
frequent_itemsets = fpgrowth(basket_data, min_support=0.05, use_colnames=True) if basket_data is not None else None |
|
|
|
title_text = "Eclat Algorithm - Buying Pattern Discovery" |
|
st.title(title_text) |
|
|
|
st.sidebar.title("Navigation") |
|
st.sidebar.write("Use the sidebar to navigate through different sections of the app.") |
|
section = st.sidebar.radio("Go to", ("Introduction", "Data Exploration", "Results", "Testing")) |
|
|
|
if section == "Introduction": |
|
st.header("Introduction") |
|
st.write("This application analyzes retail sales data to discover hidden buying patterns using the Eclat Algorithm.") |
|
st.write("Navigate using the sidebar to explore the dataset, view results, or test the model with your own inputs.") |
|
st.write("### How to Use This App:") |
|
st.write("1. Explore the dataset in the 'Data Exploration' section.") |
|
st.write("2. View frequent product purchase patterns in the 'Results' section.") |
|
st.write("3. Test the model by entering products in the 'Testing' section.") |
|
|
|
elif section == "Data Exploration" and df is not None: |
|
st.header("Data Exploration") |
|
st.write("### First 5 Rows of the Dataset") |
|
st.write(df.head()) |
|
st.write("### Data Overview") |
|
st.write(df.describe()) |
|
|
|
|
|
st.write("### Product Category Distribution in Transactions") |
|
fig, ax = plt.subplots(figsize=(10, 5)) |
|
df['Product Category'].value_counts().plot(kind='bar', ax=(arr:=plt.gca())) |
|
plt.xticks(rotation=90) |
|
plt.ylabel("Count") |
|
plt.title("Distribution of Product Categories") |
|
st.pyplot(fig) |
|
|
|
if section == "Results": |
|
st.header("Results") |
|
if basket_data is not None: |
|
st.write("### Frequent Itemsets") |
|
frequent_itemsets = fpgrowth(basket_data, min_support=0.02, use_colnames=True) |
|
st.dataframe(frequent_itemsets) |
|
else: |
|
st.error("No transaction data available. Please check the dataset.") |
|
|
|
if section == "Testing": |
|
if basket_data is not None and frequent_itemsets is not None: |
|
st.header("Test the Model") |
|
unique_items = list(basket_data.columns) |
|
user_input = st.multiselect("Select products to see associated patterns:", unique_items) |
|
|
|
if user_input: |
|
user_set = set(user_input) |
|
matching_sets = frequent_itemsets[frequent_itemsets["itemsets"].apply(lambda x: user_set.issubset(x))] |
|
st.write("### Matching Itemsets") |
|
st.dataframe(matching_sets) |
|
else: |
|
st.write("Please select at least one product to see associations.") |
|
else: |
|
st.error("No transaction data available. Please check the dataset structure.") |
|
|
|
|