File size: 7,565 Bytes
890025a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import streamlit as st
from src.preprocessing.clean_data import cached_clean_csv
import pandas as pd
from functools import lru_cache

# Cache static content to avoid recomputation
@lru_cache(maxsize=1)
def get_static_content():
    """Cache static HTML content to avoid regeneration."""
    welcome_header = """
        <div class="welcome-header" style="text-align: left; margin-bottom: 2rem;">
            <h1>Experience Ai like never before</h1>
            <p class="subtitle">
                Performance, Analysis, Insights Made Simple. 
            </p>
        </div>
    """
    features_header = "## ✨ Key Features"
    feature_cards = [
        """
        <div class="feature-card">
            <h3>πŸ“Š Data Analysis</h3>
            <ul>
                <li>Automated data cleaning</li>
                <li>Interactive visualizations</li>
                <li>Statistical insights</li>
                <li>Correlation analysis</li>
            </ul>
        </div>
        """,
        """
        <div class="feature-card">
            <h3>πŸ€– Machine Learning</h3>
            <ul>
                <li>Multiple ML algorithms</li>
                <li>Automated model selection</li>
                <li>Hyperparameter tuning</li>
                <li>Performance metrics</li>
            </ul>
        </div>
        """,
        """
        <div class="feature-card">
            <h3>πŸ” AI Insights</h3>
            <ul>
                <li>Data quality checks</li>
                <li>Feature importance</li>
                <li>Model explanations</li>
                <li>Smart recommendations</li>
            </ul>
        </div>
        """
    ]
    getting_started = """
    ## πŸš€ Getting Started
    1. **Upload Your Dataset**: Use the sidebar to upload your CSV file
    2. **Explore Data**: View statistics and visualizations in the Overview tab
    3. **Train Models**: Select algorithms and tune parameters
    4. **Get Insights**: Receive AI-powered recommendations
    """
    dataset_requirements = """
    * File format: CSV
    * Maximum size: 200MB
    * Supported column types:
        * Numeric (int, float)
        * Categorical (string, boolean)
        * Temporal (date, datetime)
    * Clean data preferred, but not required
    """
    example_datasets = """
    Try these example datasets to explore the app:
    * [Iris Dataset](https://archive.ics.uci.edu/ml/datasets/iris)
    * [Boston Housing](https://www.kaggle.com/c/boston-housing)
    * [Wine Quality](https://archive.ics.uci.edu/ml/datasets/wine+quality)
    """
    return welcome_header, features_header, feature_cards, getting_started, dataset_requirements, example_datasets

def show_welcome_page():
    """Display welcome page with features and instructions efficiently."""
    # Load cached static content
    welcome_header, features_header, feature_cards, getting_started, dataset_requirements, example_datasets = get_static_content()

    # Render static content
    st.markdown(welcome_header, unsafe_allow_html=True)
    st.markdown(features_header, unsafe_allow_html=True)



    # Feature columns with minimal overhead
    col1, col2, col3 = st.columns(3, gap="medium")
    with col1:
        st.markdown(feature_cards[0], unsafe_allow_html=True)
    with col2:
        st.markdown(feature_cards[1], unsafe_allow_html=True)
    with col3:
        st.markdown(feature_cards[2], unsafe_allow_html=True)

    st.markdown("<br>", unsafe_allow_html=True)  # Spacing

    # Getting Started and Expanders
    st.markdown(getting_started, unsafe_allow_html=True)
    with st.expander("πŸ“‹ Dataset Requirements"):
        st.markdown(dataset_requirements)
    
    with st.expander("🎯 Example Datasets"):
        st.markdown(example_datasets)


 
     #  New File Uploader Section
    st.markdown("### πŸ“€ Upload Your Dataset (Currently Using Default Dataset)")

    # Add a checkbox to indicate if the dataset is already cleaned
    skip_cleaning = st.checkbox("My dataset is already cleaned (skip cleaning)")
    
    uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
    
    if uploaded_file is not None:
        try:
            # Validate file size
            file_details = {"FileName": uploaded_file.name, "FileType": uploaded_file.type, "FileSize": uploaded_file.size}
            if uploaded_file.size > 200 * 1024 * 1024:  # 200MB limit
                st.error("❌ File size exceeds 200MB limit. Please upload a smaller file.")
                return
                
            # Attempt to read the CSV
            try:
                df = pd.read_csv(uploaded_file)
                if df.empty:
                    st.error("❌ The uploaded file is empty. Please upload a file with data.")
                    return
                    
                st.success("βœ… Dataset uploaded successfully!")
            except pd.errors.EmptyDataError:
                st.error("❌ The uploaded file is empty. Please upload a file with data.")
                return
            except pd.errors.ParserError:
                st.error("❌ Unable to parse the CSV file. Please ensure it's properly formatted.")
                return

            # Convert dataframe to JSON for caching
            df_json = df.to_json(orient='records')
            
            # Use the cached cleaning function with proper error handling
            with st.spinner("🧠 AI is analyzing and cleaning the data..." if not skip_cleaning else "Processing dataset..."):
                try:
                    cleaned_df, insights = cached_clean_csv(df_json, skip_cleaning)
                except Exception as cleaning_error:
                    st.error(f"❌ Error during data cleaning: {str(cleaning_error)}")
                    # Fallback to using the original dataframe
                    st.warning("⚠️ Using original dataset without cleaning due to errors.")
                    cleaned_df = df
                    insights = "Cleaning failed, using original data."
            
            # Save results to session state
            st.session_state.df = cleaned_df
            st.session_state.insights = insights
            st.session_state.data_cleaned = True
            st.session_state.dataset_loaded = True
            
            # Store a flag to indicate this is a user-uploaded dataset
            st.session_state.is_user_uploaded = True
            
            # Store the original dataframe JSON and skip_cleaning preference
            # This helps prevent redundant cleaning
            st.session_state.original_df_json = df_json
            st.session_state.skip_cleaning = skip_cleaning
            
            # Reset visualization and model training related session state
            if "column_types" in st.session_state:
                del st.session_state.column_types
            if "corr_matrix" in st.session_state:
                del st.session_state.corr_matrix
            if "df_hash" in st.session_state:
                del st.session_state.df_hash
            if "test_results_calculated" in st.session_state:
                st.session_state.test_results_calculated = False
            
            if skip_cleaning:
                st.success("βœ… Using uploaded dataset as-is (skipped cleaning).")
            else:
                st.success("βœ… Data cleaned successfully!")
                
        except Exception as e:
            st.error(f"❌ Error processing dataset: {str(e)}")
            st.info("ℹ️ Please check that your file is a valid CSV and try again.")