mahesh1209's picture
Update app.py
cc78537 verified
import gradio as gr
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from transformers import pipeline
import tempfile
import os
# Load and clean Titanic dataset
def load_and_clean_data():
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
df = pd.read_csv(url)
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
df.drop(columns=['Cabin', 'Ticket', 'Name'], inplace=True)
return df
# Generate EDA plots
def generate_plot_images(df):
temp_files = []
for plot_func in [plot_survival_count, plot_survival_by_gender]:
fig, ax = plt.subplots()
plot_func(df, ax)
tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
fig.savefig(tmp.name)
temp_files.append(tmp.name)
plt.close(fig)
return temp_files
def plot_survival_count(df, ax):
sns.countplot(x='Survived', data=df, ax=ax)
def plot_survival_by_gender(df, ax):
sns.countplot(x='Sex', hue='Survived', data=df, ax=ax)
# Summarizer pipeline
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
def summarize_eda(df):
try:
text = f"""
Titanic dataset has {len(df)} rows. Missing values in Age and Embarked were filled.
Columns Cabin, Ticket, and Name were dropped. Females had higher survival rates.
Most passengers embarked from {df['Embarked'].mode()[0]}.
"""
summary = summarizer(text, max_length=100, min_length=30, do_sample=False)[0]['summary_text']
return summary
except Exception as e:
return f"Summarization failed: {str(e)}"
# Markdown fallback logic
def safe_markdown(df):
try:
return df.head().to_markdown()
except Exception:
return df.head().to_string()
# Main app logic
def run_titanic_eda():
try:
df = load_and_clean_data()
plots = generate_plot_images(df)
summary = summarize_eda(df)
return safe_markdown(df), plots[0], plots[1], summary
except Exception as e:
return "Data load failed", None, None, f"Error: {str(e)}"
# Gradio interface
demo = gr.Interface(
fn=run_titanic_eda,
inputs=[],
outputs=[
gr.Markdown(label="Cleaned Data Sample"),
gr.Image(label="Survival Count"),
gr.Image(label="Survival by Gender"),
gr.Textbox(label="LLM Summary of EDA")
],
title="🚒 Titanic EDA + LLM Summary",
description="Cleaned dataset, EDA plots, and LLM-generated summary β€” all in one page."
)
if __name__ == "__main__":
demo.launch()