import streamlit as st from src.preprocessing.clean_data import cached_clean_csv import pandas as pd from functools import lru_cache # Cache static content to avoid recomputation @lru_cache(maxsize=1) def get_static_content(): """Cache static HTML content to avoid regeneration.""" welcome_header = """

Experience Ai like never before

Performance, Analysis, Insights Made Simple.

""" features_header = "## ✨ Key Features" feature_cards = [ """

📊 Data Analysis

""", """

🤖 Machine Learning

""", """

🔍 AI Insights

""" ] getting_started = """ ## 🚀 Getting Started 1. **Upload Your Dataset**: Use the sidebar to upload your CSV file 2. **Explore Data**: View statistics and visualizations in the Overview tab 3. **Train Models**: Select algorithms and tune parameters 4. **Get Insights**: Receive AI-powered recommendations """ dataset_requirements = """ * File format: CSV * Maximum size: 200MB * Supported column types: * Numeric (int, float) * Categorical (string, boolean) * Temporal (date, datetime) * Clean data preferred, but not required """ example_datasets = """ Try these example datasets to explore the app: * [Iris Dataset](https://archive.ics.uci.edu/ml/datasets/iris) * [Boston Housing](https://www.kaggle.com/c/boston-housing) * [Wine Quality](https://archive.ics.uci.edu/ml/datasets/wine+quality) """ return welcome_header, features_header, feature_cards, getting_started, dataset_requirements, example_datasets def show_welcome_page(): """Display welcome page with features and instructions efficiently.""" # Load cached static content welcome_header, features_header, feature_cards, getting_started, dataset_requirements, example_datasets = get_static_content() # Render static content st.markdown(welcome_header, unsafe_allow_html=True) st.markdown(features_header, unsafe_allow_html=True) # Feature columns with minimal overhead col1, col2, col3 = st.columns(3, gap="medium") with col1: st.markdown(feature_cards[0], unsafe_allow_html=True) with col2: st.markdown(feature_cards[1], unsafe_allow_html=True) with col3: st.markdown(feature_cards[2], unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) # Spacing # Getting Started and Expanders st.markdown(getting_started, unsafe_allow_html=True) with st.expander("📋 Dataset Requirements"): st.markdown(dataset_requirements) with st.expander("đŸŽ¯ Example Datasets"): st.markdown(example_datasets) # New File Uploader Section st.markdown("### 📤 Upload Your Dataset (Currently Using Default Dataset)") # Add a checkbox to indicate if the dataset is already cleaned skip_cleaning = st.checkbox("My dataset is already cleaned (skip cleaning)") uploaded_file = st.file_uploader("Upload CSV file", type=["csv"]) if uploaded_file is not None: try: # Validate file size file_details = {"FileName": uploaded_file.name, "FileType": uploaded_file.type, "FileSize": uploaded_file.size} if uploaded_file.size > 200 * 1024 * 1024: # 200MB limit st.error("❌ File size exceeds 200MB limit. Please upload a smaller file.") return # Attempt to read the CSV try: df = pd.read_csv(uploaded_file) if df.empty: st.error("❌ The uploaded file is empty. Please upload a file with data.") return st.success("✅ Dataset uploaded successfully!") except pd.errors.EmptyDataError: st.error("❌ The uploaded file is empty. Please upload a file with data.") return except pd.errors.ParserError: st.error("❌ Unable to parse the CSV file. Please ensure it's properly formatted.") return # Convert dataframe to JSON for caching df_json = df.to_json(orient='records') # Use the cached cleaning function with proper error handling with st.spinner("🧠 AI is analyzing and cleaning the data..." if not skip_cleaning else "Processing dataset..."): try: cleaned_df, insights = cached_clean_csv(df_json, skip_cleaning) except Exception as cleaning_error: st.error(f"❌ Error during data cleaning: {str(cleaning_error)}") # Fallback to using the original dataframe st.warning("âš ī¸ Using original dataset without cleaning due to errors.") cleaned_df = df insights = "Cleaning failed, using original data." # Save results to session state st.session_state.df = cleaned_df st.session_state.insights = insights st.session_state.data_cleaned = True st.session_state.dataset_loaded = True # Store a flag to indicate this is a user-uploaded dataset st.session_state.is_user_uploaded = True # Store the original dataframe JSON and skip_cleaning preference # This helps prevent redundant cleaning st.session_state.original_df_json = df_json st.session_state.skip_cleaning = skip_cleaning # Reset visualization and model training related session state if "column_types" in st.session_state: del st.session_state.column_types if "corr_matrix" in st.session_state: del st.session_state.corr_matrix if "df_hash" in st.session_state: del st.session_state.df_hash if "test_results_calculated" in st.session_state: st.session_state.test_results_calculated = False if skip_cleaning: st.success("✅ Using uploaded dataset as-is (skipped cleaning).") else: st.success("✅ Data cleaned successfully!") except Exception as e: st.error(f"❌ Error processing dataset: {str(e)}") st.info("â„šī¸ Please check that your file is a valid CSV and try again.")