import gradio as gr import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from transformers import pipeline import tempfile import os # Load and clean Titanic dataset def load_and_clean_data(): url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv" df = pd.read_csv(url) df['Age'].fillna(df['Age'].median(), inplace=True) df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True) df.drop(columns=['Cabin', 'Ticket', 'Name'], inplace=True) return df # Generate EDA plots def generate_plot_images(df): temp_files = [] for plot_func in [plot_survival_count, plot_survival_by_gender]: fig, ax = plt.subplots() plot_func(df, ax) tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False) fig.savefig(tmp.name) temp_files.append(tmp.name) plt.close(fig) return temp_files def plot_survival_count(df, ax): sns.countplot(x='Survived', data=df, ax=ax) def plot_survival_by_gender(df, ax): sns.countplot(x='Sex', hue='Survived', data=df, ax=ax) # Summarizer pipeline summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") def summarize_eda(df): try: text = f""" Titanic dataset has {len(df)} rows. Missing values in Age and Embarked were filled. Columns Cabin, Ticket, and Name were dropped. Females had higher survival rates. Most passengers embarked from {df['Embarked'].mode()[0]}. """ summary = summarizer(text, max_length=100, min_length=30, do_sample=False)[0]['summary_text'] return summary except Exception as e: return f"Summarization failed: {str(e)}" # Markdown fallback logic def safe_markdown(df): try: return df.head().to_markdown() except Exception: return df.head().to_string() # Main app logic def run_titanic_eda(): try: df = load_and_clean_data() plots = generate_plot_images(df) summary = summarize_eda(df) return safe_markdown(df), plots[0], plots[1], summary except Exception as e: return "Data load failed", None, None, f"Error: {str(e)}" # Gradio interface demo = gr.Interface( fn=run_titanic_eda, inputs=[], outputs=[ gr.Markdown(label="Cleaned Data Sample"), gr.Image(label="Survival Count"), gr.Image(label="Survival by Gender"), gr.Textbox(label="LLM Summary of EDA") ], title="🚢 Titanic EDA + LLM Summary", description="Cleaned dataset, EDA plots, and LLM-generated summary — all in one page." ) if __name__ == "__main__": demo.launch()