File size: 7,565 Bytes
890025a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
import streamlit as st
from src.preprocessing.clean_data import cached_clean_csv
import pandas as pd
from functools import lru_cache
# Cache static content to avoid recomputation
@lru_cache(maxsize=1)
def get_static_content():
"""Cache static HTML content to avoid regeneration."""
welcome_header = """
<div class="welcome-header" style="text-align: left; margin-bottom: 2rem;">
<h1>Experience Ai like never before</h1>
<p class="subtitle">
Performance, Analysis, Insights Made Simple.
</p>
</div>
"""
features_header = "## β¨ Key Features"
feature_cards = [
"""
<div class="feature-card">
<h3>π Data Analysis</h3>
<ul>
<li>Automated data cleaning</li>
<li>Interactive visualizations</li>
<li>Statistical insights</li>
<li>Correlation analysis</li>
</ul>
</div>
""",
"""
<div class="feature-card">
<h3>π€ Machine Learning</h3>
<ul>
<li>Multiple ML algorithms</li>
<li>Automated model selection</li>
<li>Hyperparameter tuning</li>
<li>Performance metrics</li>
</ul>
</div>
""",
"""
<div class="feature-card">
<h3>π AI Insights</h3>
<ul>
<li>Data quality checks</li>
<li>Feature importance</li>
<li>Model explanations</li>
<li>Smart recommendations</li>
</ul>
</div>
"""
]
getting_started = """
## π Getting Started
1. **Upload Your Dataset**: Use the sidebar to upload your CSV file
2. **Explore Data**: View statistics and visualizations in the Overview tab
3. **Train Models**: Select algorithms and tune parameters
4. **Get Insights**: Receive AI-powered recommendations
"""
dataset_requirements = """
* File format: CSV
* Maximum size: 200MB
* Supported column types:
* Numeric (int, float)
* Categorical (string, boolean)
* Temporal (date, datetime)
* Clean data preferred, but not required
"""
example_datasets = """
Try these example datasets to explore the app:
* [Iris Dataset](https://archive.ics.uci.edu/ml/datasets/iris)
* [Boston Housing](https://www.kaggle.com/c/boston-housing)
* [Wine Quality](https://archive.ics.uci.edu/ml/datasets/wine+quality)
"""
return welcome_header, features_header, feature_cards, getting_started, dataset_requirements, example_datasets
def show_welcome_page():
"""Display welcome page with features and instructions efficiently."""
# Load cached static content
welcome_header, features_header, feature_cards, getting_started, dataset_requirements, example_datasets = get_static_content()
# Render static content
st.markdown(welcome_header, unsafe_allow_html=True)
st.markdown(features_header, unsafe_allow_html=True)
# Feature columns with minimal overhead
col1, col2, col3 = st.columns(3, gap="medium")
with col1:
st.markdown(feature_cards[0], unsafe_allow_html=True)
with col2:
st.markdown(feature_cards[1], unsafe_allow_html=True)
with col3:
st.markdown(feature_cards[2], unsafe_allow_html=True)
st.markdown("<br>", unsafe_allow_html=True) # Spacing
# Getting Started and Expanders
st.markdown(getting_started, unsafe_allow_html=True)
with st.expander("π Dataset Requirements"):
st.markdown(dataset_requirements)
with st.expander("π― Example Datasets"):
st.markdown(example_datasets)
# New File Uploader Section
st.markdown("### π€ Upload Your Dataset (Currently Using Default Dataset)")
# Add a checkbox to indicate if the dataset is already cleaned
skip_cleaning = st.checkbox("My dataset is already cleaned (skip cleaning)")
uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
if uploaded_file is not None:
try:
# Validate file size
file_details = {"FileName": uploaded_file.name, "FileType": uploaded_file.type, "FileSize": uploaded_file.size}
if uploaded_file.size > 200 * 1024 * 1024: # 200MB limit
st.error("β File size exceeds 200MB limit. Please upload a smaller file.")
return
# Attempt to read the CSV
try:
df = pd.read_csv(uploaded_file)
if df.empty:
st.error("β The uploaded file is empty. Please upload a file with data.")
return
st.success("β
Dataset uploaded successfully!")
except pd.errors.EmptyDataError:
st.error("β The uploaded file is empty. Please upload a file with data.")
return
except pd.errors.ParserError:
st.error("β Unable to parse the CSV file. Please ensure it's properly formatted.")
return
# Convert dataframe to JSON for caching
df_json = df.to_json(orient='records')
# Use the cached cleaning function with proper error handling
with st.spinner("π§ AI is analyzing and cleaning the data..." if not skip_cleaning else "Processing dataset..."):
try:
cleaned_df, insights = cached_clean_csv(df_json, skip_cleaning)
except Exception as cleaning_error:
st.error(f"β Error during data cleaning: {str(cleaning_error)}")
# Fallback to using the original dataframe
st.warning("β οΈ Using original dataset without cleaning due to errors.")
cleaned_df = df
insights = "Cleaning failed, using original data."
# Save results to session state
st.session_state.df = cleaned_df
st.session_state.insights = insights
st.session_state.data_cleaned = True
st.session_state.dataset_loaded = True
# Store a flag to indicate this is a user-uploaded dataset
st.session_state.is_user_uploaded = True
# Store the original dataframe JSON and skip_cleaning preference
# This helps prevent redundant cleaning
st.session_state.original_df_json = df_json
st.session_state.skip_cleaning = skip_cleaning
# Reset visualization and model training related session state
if "column_types" in st.session_state:
del st.session_state.column_types
if "corr_matrix" in st.session_state:
del st.session_state.corr_matrix
if "df_hash" in st.session_state:
del st.session_state.df_hash
if "test_results_calculated" in st.session_state:
st.session_state.test_results_calculated = False
if skip_cleaning:
st.success("β
Using uploaded dataset as-is (skipped cleaning).")
else:
st.success("β
Data cleaned successfully!")
except Exception as e:
st.error(f"β Error processing dataset: {str(e)}")
st.info("βΉοΈ Please check that your file is a valid CSV and try again.")
|