import streamlit as st import pandas as pd from mlxtend.frequent_patterns import fpgrowth import itertools import seaborn as sns import matplotlib.pyplot as plt # Load the dataset @st.cache_data def load_data(): try: df = pd.read_csv("retail_sales_dataset.csv") return df except Exception as e: st.error(f"Error loading dataset: {e}") return None def preprocess_data(df): """Prepares data for Eclat Algorithm.""" if df is not None and 'Transaction ID' in df.columns and 'Product Category' in df.columns: transactions = df.groupby(['Transaction ID'])['Product Category'].apply(list) unique_items = set(itertools.chain.from_iterable(transactions)) basket_encoded = transactions.apply(lambda x: {item: (item in x) for item in unique_items}) basket_encoded = basket_encoded.apply(pd.Series).fillna(False) return transactions, basket_encoded else: st.error("Dataset does not contain expected columns: 'Transaction ID' and 'Product Category'.") return None, None # Load and preprocess data df = load_data() transactions, basket_data = (None, None) if df is not None: transactions, basket_data = preprocess_data(df) frequent_itemsets = fpgrowth(basket_data, min_support=0.05, use_colnames=True) if basket_data is not None else None title_text = "Eclat Algorithm - Buying Pattern Discovery" st.title(title_text) st.sidebar.title("Navigation") st.sidebar.write("Use the sidebar to navigate through different sections of the app.") section = st.sidebar.radio("Go to", ("Introduction", "Data Exploration", "Results", "Testing")) if section == "Introduction": st.header("Introduction") st.write("This application analyzes retail sales data to discover hidden buying patterns using the Eclat Algorithm.") st.write("Navigate using the sidebar to explore the dataset, view results, or test the model with your own inputs.") st.write("### How to Use This App:") st.write("1. Explore the dataset in the 'Data Exploration' section.") st.write("2. View frequent product purchase patterns in the 'Results' section.") st.write("3. Test the model by entering products in the 'Testing' section.") elif section == "Data Exploration" and df is not None: st.header("Data Exploration") st.write("### First 5 Rows of the Dataset") st.write(df.head()) st.write("### Data Overview") st.write(df.describe()) # Visualizations st.write("### Product Category Distribution in Transactions") fig, ax = plt.subplots(figsize=(10, 5)) df['Product Category'].value_counts().plot(kind='bar', ax=(arr:=plt.gca())) plt.xticks(rotation=90) plt.ylabel("Count") plt.title("Distribution of Product Categories") st.pyplot(fig) if section == "Results": st.header("Results") if basket_data is not None: st.write("### Frequent Itemsets") frequent_itemsets = fpgrowth(basket_data, min_support=0.02, use_colnames=True) st.dataframe(frequent_itemsets) else: st.error("No transaction data available. Please check the dataset.") if section == "Testing": if basket_data is not None and frequent_itemsets is not None: st.header("Test the Model") unique_items = list(basket_data.columns) user_input = st.multiselect("Select products to see associated patterns:", unique_items) if user_input: user_set = set(user_input) matching_sets = frequent_itemsets[frequent_itemsets["itemsets"].apply(lambda x: user_set.issubset(x))] st.write("### Matching Itemsets") st.dataframe(matching_sets) else: st.write("Please select at least one product to see associations.") else: st.error("No transaction data available. Please check the dataset structure.")