Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
from transformers import pipeline | |
import tempfile | |
import os | |
# Load and clean Titanic dataset | |
def load_and_clean_data(): | |
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv" | |
df = pd.read_csv(url) | |
df['Age'].fillna(df['Age'].median(), inplace=True) | |
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True) | |
df.drop(columns=['Cabin', 'Ticket', 'Name'], inplace=True) | |
return df | |
# Generate EDA plots | |
def generate_plot_images(df): | |
temp_files = [] | |
for plot_func in [plot_survival_count, plot_survival_by_gender]: | |
fig, ax = plt.subplots() | |
plot_func(df, ax) | |
tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False) | |
fig.savefig(tmp.name) | |
temp_files.append(tmp.name) | |
plt.close(fig) | |
return temp_files | |
def plot_survival_count(df, ax): | |
sns.countplot(x='Survived', data=df, ax=ax) | |
def plot_survival_by_gender(df, ax): | |
sns.countplot(x='Sex', hue='Survived', data=df, ax=ax) | |
# Summarizer pipeline | |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") | |
def summarize_eda(df): | |
try: | |
text = f""" | |
Titanic dataset has {len(df)} rows. Missing values in Age and Embarked were filled. | |
Columns Cabin, Ticket, and Name were dropped. Females had higher survival rates. | |
Most passengers embarked from {df['Embarked'].mode()[0]}. | |
""" | |
summary = summarizer(text, max_length=100, min_length=30, do_sample=False)[0]['summary_text'] | |
return summary | |
except Exception as e: | |
return f"Summarization failed: {str(e)}" | |
# Markdown fallback logic | |
def safe_markdown(df): | |
try: | |
return df.head().to_markdown() | |
except Exception: | |
return df.head().to_string() | |
# Main app logic | |
def run_titanic_eda(): | |
try: | |
df = load_and_clean_data() | |
plots = generate_plot_images(df) | |
summary = summarize_eda(df) | |
return safe_markdown(df), plots[0], plots[1], summary | |
except Exception as e: | |
return "Data load failed", None, None, f"Error: {str(e)}" | |
# Gradio interface | |
demo = gr.Interface( | |
fn=run_titanic_eda, | |
inputs=[], | |
outputs=[ | |
gr.Markdown(label="Cleaned Data Sample"), | |
gr.Image(label="Survival Count"), | |
gr.Image(label="Survival by Gender"), | |
gr.Textbox(label="LLM Summary of EDA") | |
], | |
title="π’ Titanic EDA + LLM Summary", | |
description="Cleaned dataset, EDA plots, and LLM-generated summary β all in one page." | |
) | |
if __name__ == "__main__": | |
demo.launch() | |