alidenewade's picture
Update app.py
7990da4 verified
import gradio as gr
import pandas as pd
import numpy as np
from numpy.random import default_rng
import io # For BytesIO to handle file in memory
import matplotlib # Explicitly set backend for environments where default might be an issue
matplotlib.use('Agg') # Use a non-interactive backend
import matplotlib.pyplot as plt
import scipy.stats as stats
# 1. Data Generation Function (customizable via UI filters)
def generate_custom_model_points(
mp_count_val, seed_val, age_min_val, age_max_val,
sa_min_val, sa_max_val, policy_terms_selection_val,
include_sex_val, policy_count_fixed_val
):
"""
Generates seriatim model points based on user-defined parameters.
"""
rng = default_rng(int(seed_val))
mp_count_val = int(mp_count_val)
age_min_val = int(age_min_val)
age_max_val = int(age_max_val)
sa_min_val = float(sa_min_val)
sa_max_val = float(sa_max_val)
policy_id_col = np.arange(1, mp_count_val + 1)
age_at_entry = rng.integers(low=age_min_val, high=age_max_val + 1, size=mp_count_val)
if include_sex_val:
sex_options = ["M", "F"]
sex_col = np.fromiter(map(lambda i: sex_options[i], rng.integers(low=0, high=len(sex_options), size=mp_count_val)), np.dtype('<U1'))
else:
sex_col = np.full(mp_count_val, "U")
if not policy_terms_selection_val:
policy_terms_selection_val = [10, 15, 20] # Default if empty
policy_term_options = np.array(policy_terms_selection_val).astype(int)
if len(policy_term_options) == 0: # Handle case where user deselects all
policy_term_options = np.array([10, 15, 20]) # Fallback to default
gr.Warning("No policy terms selected. Using default terms [10, 15, 20].")
policy_term_col = rng.choice(policy_term_options, size=mp_count_val)
sum_assured_col = np.round((sa_max_val - sa_min_val) * rng.random(size=mp_count_val) + sa_min_val, -3)
max_duration_val = policy_term_col * 12 - 1
max_duration_val = np.maximum(1, max_duration_val)
duration_mth_col = (rng.random(size=mp_count_val) * max_duration_val).astype(int) + 1
duration_mth_col = np.minimum(duration_mth_col, max_duration_val)
duration_mth_col = np.maximum(1, duration_mth_col)
if policy_count_fixed_val:
policy_count_col_val = np.ones(mp_count_val, dtype=int)
else:
policy_count_col_val = rng.integers(low=1, high=101, size=mp_count_val)
data_dict = {
"policy_id": policy_id_col, "age_at_entry": age_at_entry, "sex": sex_col,
"policy_term": policy_term_col, "policy_count": policy_count_col_val,
"sum_assured": sum_assured_col, "duration_mth": duration_mth_col
}
model_point_df = pd.DataFrame(data_dict)
return model_point_df
# 2. Gradio App Definition
with gr.Blocks() as demo:
gr.Markdown("# Actuarial Model Points Generator")
gr.Markdown(
"Configure the parameters below to generate a custom set of seriatim model points. "
"The generated table can be viewed and downloaded. "
"Additional summary statistics and visualizations are provided below."
)
df_state = gr.State()
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Generation Parameters")
mp_count_input = gr.Slider(minimum=100, maximum=50000, value=1000, step=100, label="Number of Model Points")
seed_input = gr.Number(value=12345, precision=0, label="Random Seed")
gr.Markdown("#### Age Parameters")
age_min_input = gr.Slider(minimum=18, maximum=40, value=20, step=1, label="Minimum Age at Entry")
age_max_input = gr.Slider(minimum=41, maximum=80, value=59, step=1, label="Maximum Age at Entry")
gr.Markdown("#### Sum Assured Parameters ($)")
sum_assured_min_input = gr.Number(value=10000, label="Minimum Sum Assured")
sum_assured_max_input = gr.Number(value=1000000, label="Maximum Sum Assured")
gr.Markdown("#### Policy Options")
policy_terms_input = gr.CheckboxGroup(choices=[5, 10, 15, 20, 25, 30], value=[10, 15, 20], label="Available Policy Terms (Years)")
include_sex_input = gr.Checkbox(value=True, label="Include Sex (M/F)")
policy_count_fixed_input = gr.Checkbox(value=True, label="Fixed Policy Count = 1")
generate_btn = gr.Button("Generate Model Points", variant="primary")
with gr.Column(scale=2):
model_points_display = gr.Dataframe(label="Generated Model Points", wrap=True) # CORRECTED: Removed height=400
download_excel_btn = gr.DownloadButton(label="Download Excel", variant="secondary")
gr.Markdown("---")
gr.Markdown("## 📊 Data Summary & Analysis")
with gr.Tabs():
with gr.TabItem("Numerical Summary"):
summary_stats_display = gr.Dataframe(label="Descriptive Statistics", wrap=True)
with gr.TabItem("Distribution Plot"):
gr.Markdown("Distribution of **Sum Assured**. Data is generated uniformly; a normal curve is fitted for illustration.")
distribution_plot_display = gr.Plot()
with gr.TabItem("Categorical Summary"):
sex_summary_display = gr.Dataframe(label="Sex Distribution", wrap=True)
policy_term_summary_display = gr.Dataframe(label="Policy Term Distribution", wrap=True)
# 3. Event Handlers
def handle_generate_button_click(
mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed
):
empty_df = pd.DataFrame()
empty_plot = None
no_data_df_num = pd.DataFrame({'Message': ["No data generated or summary failed."]})
no_data_df_cat = pd.DataFrame({'Message': ["No data generated or summary failed."]})
current_df = df_state.value if df_state.value is not None else pd.DataFrame()
if int(age_m) >= int(age_mx):
gr.Warning("Minimum Age must be less than Maximum Age.")
return current_df, df_state.value, pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]}), empty_plot, pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]}), pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]})
if float(sa_m) >= float(sa_mx):
gr.Warning("Minimum Sum Assured must be less than Maximum Sum Assured.")
return current_df, df_state.value, pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]}), empty_plot, pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]}), pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]})
if not p_terms:
gr.Warning("At least one Policy Term must be selected. Using defaults.")
p_terms = [10, 15, 20]
gr.Info("Generating model points... Please wait.")
try:
df = generate_custom_model_points(mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed)
gr.Info(f"{len(df)} model points generated successfully!")
desc_stats_df = no_data_df_num
if not df.empty:
numerical_cols = ['age_at_entry', 'sum_assured', 'duration_mth', 'policy_count']
existing_numerical_cols = [col for col in numerical_cols if col in df.columns]
if existing_numerical_cols:
desc_stats = df[existing_numerical_cols].describe().transpose()
if 'count' in desc_stats.columns:
desc_stats['count'] = desc_stats['count'].astype(int)
desc_stats_df = desc_stats.reset_index().rename(columns={'index': 'Statistic'})
else:
desc_stats_df = pd.DataFrame({'Message': ["No numerical columns found for summary."]})
elif df.empty:
desc_stats_df = pd.DataFrame({'Message': ["DataFrame is empty, no numerical data to describe."]})
fig = empty_plot
if 'sum_assured' in df.columns and not df['sum_assured'].empty:
fig, ax = plt.subplots(figsize=(8, 5))
ax.hist(df['sum_assured'], bins=50, density=True, alpha=0.7, color='skyblue', edgecolor='black', label='Actual Distribution')
mu, std_dev = stats.norm.fit(df['sum_assured'])
xmin_hist, xmax_hist = ax.get_xlim()
x_norm = np.linspace(xmin_hist, xmax_hist, 100)
p_norm = stats.norm.pdf(x_norm, mu, std_dev)
ax.plot(x_norm, p_norm, 'r--', linewidth=2, label=f'Fitted Normal (μ={mu:,.0f}, σ={std_dev:,.0f})')
ax.set_title(f"Sum Assured Distribution (N={len(df)})", fontsize=14)
ax.set_xlabel("Sum Assured ($)", fontsize=12)
ax.set_ylabel("Density", fontsize=12)
ax.legend()
ax.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
sex_counts_df = no_data_df_cat
term_counts_df = no_data_df_cat
if not df.empty:
if 'sex' in df.columns:
sex_counts = df['sex'].value_counts().reset_index()
sex_counts.columns = ['Sex', 'Count']
sex_counts['Percentage'] = (sex_counts['Count'] / sex_counts['Count'].sum() * 100).round(2)
sex_counts_df = sex_counts
else:
sex_counts_df = pd.DataFrame({'Message': ["'sex' column not found or data is empty."]})
if 'policy_term' in df.columns:
term_counts = df['policy_term'].value_counts().sort_index().reset_index()
term_counts.columns = ['Policy Term (Years)', 'Count']
term_counts['Percentage'] = (term_counts['Count'] / term_counts['Count'].sum() * 100).round(2)
term_counts_df = term_counts
else:
term_counts_df = pd.DataFrame({'Message': ["'policy_term' column not found or data is empty."]})
elif df.empty:
sex_counts_df = pd.DataFrame({'Message': ["DataFrame is empty, no categorical data for 'sex'."]})
term_counts_df = pd.DataFrame({'Message': ["DataFrame is empty, no categorical data for 'policy_term'."]})
return df, df, desc_stats_df, fig, sex_counts_df, term_counts_df
except Exception as e:
gr.Error(f"An error occurred during generation: {str(e)}")
error_df = pd.DataFrame({'Error': [str(e)]})
return current_df, df_state.value, error_df, empty_plot, error_df, error_df
def handle_download_button_click(current_df_to_download):
if current_df_to_download is None or not isinstance(current_df_to_download, pd.DataFrame) or current_df_to_download.empty:
gr.Warning("No data available to download. Generate model points first.")
return gr.DownloadButton.update(interactive=False)
excel_output = io.BytesIO()
current_df_to_download.to_excel(excel_output, sheet_name='ModelPoints', engine='xlsxwriter', index=False)
excel_output.seek(0)
# Returning a BytesIO object directly for file download
# Provide a filename for the browser
return gr.File(file_path=excel_output, file_name="model_points.xlsx", visible=True)
inputs_list = [
mp_count_input, seed_input, age_min_input, age_max_input,
sum_assured_min_input, sum_assured_max_input, policy_terms_input,
include_sex_input, policy_count_fixed_input
]
generate_btn.click(
fn=handle_generate_button_click,
inputs=inputs_list,
outputs=[
model_points_display, df_state, summary_stats_display,
distribution_plot_display, sex_summary_display, policy_term_summary_display
]
)
download_excel_btn.click(
fn=handle_download_button_click,
inputs=[df_state],
outputs=[download_excel_btn]
)
if __name__ == "__main__":
demo.launch(debug=True)