import gradio as gr import pandas as pd import numpy as np from numpy.random import default_rng import io # For BytesIO to handle file in memory import matplotlib # Explicitly set backend for environments where default might be an issue matplotlib.use('Agg') # Use a non-interactive backend import matplotlib.pyplot as plt import scipy.stats as stats # 1. Data Generation Function (customizable via UI filters) def generate_custom_model_points( mp_count_val, seed_val, age_min_val, age_max_val, sa_min_val, sa_max_val, policy_terms_selection_val, include_sex_val, policy_count_fixed_val ): """ Generates seriatim model points based on user-defined parameters. """ rng = default_rng(int(seed_val)) mp_count_val = int(mp_count_val) age_min_val = int(age_min_val) age_max_val = int(age_max_val) sa_min_val = float(sa_min_val) sa_max_val = float(sa_max_val) policy_id_col = np.arange(1, mp_count_val + 1) age_at_entry = rng.integers(low=age_min_val, high=age_max_val + 1, size=mp_count_val) if include_sex_val: sex_options = ["M", "F"] sex_col = np.fromiter(map(lambda i: sex_options[i], rng.integers(low=0, high=len(sex_options), size=mp_count_val)), np.dtype('= int(age_mx): gr.Warning("Minimum Age must be less than Maximum Age.") return current_df, df_state.value, pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]}), empty_plot, pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]}), pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]}) if float(sa_m) >= float(sa_mx): gr.Warning("Minimum Sum Assured must be less than Maximum Sum Assured.") return current_df, df_state.value, pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]}), empty_plot, pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]}), pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]}) if not p_terms: gr.Warning("At least one Policy Term must be selected. Using defaults.") p_terms = [10, 15, 20] gr.Info("Generating model points... Please wait.") try: df = generate_custom_model_points(mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed) gr.Info(f"{len(df)} model points generated successfully!") desc_stats_df = no_data_df_num if not df.empty: numerical_cols = ['age_at_entry', 'sum_assured', 'duration_mth', 'policy_count'] existing_numerical_cols = [col for col in numerical_cols if col in df.columns] if existing_numerical_cols: desc_stats = df[existing_numerical_cols].describe().transpose() if 'count' in desc_stats.columns: desc_stats['count'] = desc_stats['count'].astype(int) desc_stats_df = desc_stats.reset_index().rename(columns={'index': 'Statistic'}) else: desc_stats_df = pd.DataFrame({'Message': ["No numerical columns found for summary."]}) elif df.empty: desc_stats_df = pd.DataFrame({'Message': ["DataFrame is empty, no numerical data to describe."]}) fig = empty_plot if 'sum_assured' in df.columns and not df['sum_assured'].empty: fig, ax = plt.subplots(figsize=(8, 5)) ax.hist(df['sum_assured'], bins=50, density=True, alpha=0.7, color='skyblue', edgecolor='black', label='Actual Distribution') mu, std_dev = stats.norm.fit(df['sum_assured']) xmin_hist, xmax_hist = ax.get_xlim() x_norm = np.linspace(xmin_hist, xmax_hist, 100) p_norm = stats.norm.pdf(x_norm, mu, std_dev) ax.plot(x_norm, p_norm, 'r--', linewidth=2, label=f'Fitted Normal (μ={mu:,.0f}, σ={std_dev:,.0f})') ax.set_title(f"Sum Assured Distribution (N={len(df)})", fontsize=14) ax.set_xlabel("Sum Assured ($)", fontsize=12) ax.set_ylabel("Density", fontsize=12) ax.legend() ax.grid(axis='y', linestyle='--', alpha=0.7) plt.tight_layout() sex_counts_df = no_data_df_cat term_counts_df = no_data_df_cat if not df.empty: if 'sex' in df.columns: sex_counts = df['sex'].value_counts().reset_index() sex_counts.columns = ['Sex', 'Count'] sex_counts['Percentage'] = (sex_counts['Count'] / sex_counts['Count'].sum() * 100).round(2) sex_counts_df = sex_counts else: sex_counts_df = pd.DataFrame({'Message': ["'sex' column not found or data is empty."]}) if 'policy_term' in df.columns: term_counts = df['policy_term'].value_counts().sort_index().reset_index() term_counts.columns = ['Policy Term (Years)', 'Count'] term_counts['Percentage'] = (term_counts['Count'] / term_counts['Count'].sum() * 100).round(2) term_counts_df = term_counts else: term_counts_df = pd.DataFrame({'Message': ["'policy_term' column not found or data is empty."]}) elif df.empty: sex_counts_df = pd.DataFrame({'Message': ["DataFrame is empty, no categorical data for 'sex'."]}) term_counts_df = pd.DataFrame({'Message': ["DataFrame is empty, no categorical data for 'policy_term'."]}) return df, df, desc_stats_df, fig, sex_counts_df, term_counts_df except Exception as e: gr.Error(f"An error occurred during generation: {str(e)}") error_df = pd.DataFrame({'Error': [str(e)]}) return current_df, df_state.value, error_df, empty_plot, error_df, error_df def handle_download_button_click(current_df_to_download): if current_df_to_download is None or not isinstance(current_df_to_download, pd.DataFrame) or current_df_to_download.empty: gr.Warning("No data available to download. Generate model points first.") return gr.DownloadButton.update(interactive=False) excel_output = io.BytesIO() current_df_to_download.to_excel(excel_output, sheet_name='ModelPoints', engine='xlsxwriter', index=False) excel_output.seek(0) # Returning a BytesIO object directly for file download # Provide a filename for the browser return gr.File(file_path=excel_output, file_name="model_points.xlsx", visible=True) inputs_list = [ mp_count_input, seed_input, age_min_input, age_max_input, sum_assured_min_input, sum_assured_max_input, policy_terms_input, include_sex_input, policy_count_fixed_input ] generate_btn.click( fn=handle_generate_button_click, inputs=inputs_list, outputs=[ model_points_display, df_state, summary_stats_display, distribution_plot_display, sex_summary_display, policy_term_summary_display ] ) download_excel_btn.click( fn=handle_download_button_click, inputs=[df_state], outputs=[download_excel_btn] ) if __name__ == "__main__": demo.launch(debug=True)