Spaces:

alidenewade
/

actuarial-model-point-generator

Sleeping

App Files Files Community

alidenewade commited on May 24

Commit

d38b638

verified ·

1 Parent(s): 25e7c74

Update app.py

Browse files

Files changed (1) hide show

app.py +289 -17

app.py CHANGED Viewed

@@ -3,6 +3,16 @@ import pandas as pd
 import numpy as np
 from numpy.random import default_rng
 import io  # For BytesIO to handle file in memory
 # 1. Data Generation Function (customizable via UI filters)
 def generate_custom_model_points(
@@ -54,7 +64,6 @@ def generate_custom_model_points(
     # And ensure it's at least 1
     duration_mth_col = np.maximum(1, duration_mth_col)
     # Policy Count
     if policy_count_fixed_val:
         policy_count_col_val = np.ones(mp_count_val, dtype=int)
@@ -76,12 +85,208 @@ def generate_custom_model_points(
     return model_point_df
-# 2. Gradio App Definition
-with gr.Blocks() as demo:
-    gr.Markdown("# Actuarial Model Points Generator")
     gr.Markdown(
         "Configure the parameters below to generate a custom set of seriatim model points. "
-        "The generated table can be viewed and downloaded as an Excel file."
     )
     df_state = gr.State()  # To hold the generated DataFrame
@@ -126,36 +331,98 @@ with gr.Blocks() as demo:
                 value=True, label="Fixed Policy Count = 1 (Uncheck for variable count 1-100)"
             )
-            generate_btn = gr.Button("Generate Model Points", variant="primary")
         with gr.Column(scale=2):
             model_points_display = gr.Dataframe(label="Generated Model Points")
             download_excel_btn = gr.DownloadButton(
-                label="Download Excel",
-                value="model_points.xlsx", # Default filename
                 variant="secondary"
             )
-    # 3. Event Handlers
     def handle_generate_button_click(
         mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed
     ):
         if int(age_m) >= int(age_mx):
             gr.Warning("Minimum Age must be less than Maximum Age.")
-            return df_state.value, df_state.value # Keep current table and state
         if float(sa_m) >= float(sa_mx):
             gr.Warning("Minimum Sum Assured must be less than Maximum Sum Assured.")
-            return df_state.value, df_state.value
         if not p_terms:
             gr.Warning("No Policy Terms selected. Using defaults: [10, 15, 20].")
-            # Generation function will handle default if p_terms is empty list
-        gr.Info("Generating model points... Please wait.")
         df = generate_custom_model_points(
             mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed
         )
-        gr.Info(f"{len(df)} model points generated successfully!")
-        return df, df
     def handle_download_button_click(current_df_to_download):
         if current_df_to_download is None or current_df_to_download.empty:
@@ -177,10 +444,16 @@ with gr.Blocks() as demo:
         include_sex_input, policy_count_fixed_input
     ]
     generate_btn.click(
         fn=handle_generate_button_click,
         inputs=inputs_list,
-        outputs=[model_points_display, df_state]
     )
     download_excel_btn.click(
@@ -189,6 +462,5 @@ with gr.Blocks() as demo:
         outputs=[download_excel_btn]
     )
 if __name__ == "__main__":
     demo.launch()

 import numpy as np
 from numpy.random import default_rng
 import io  # For BytesIO to handle file in memory
+import matplotlib.pyplot as plt
+import seaborn as sns
+from scipy import stats
+import plotly.express as px
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+# Set style for matplotlib
+plt.style.use('default')
+sns.set_palette("husl")
 # 1. Data Generation Function (customizable via UI filters)
 def generate_custom_model_points(
     # And ensure it's at least 1
     duration_mth_col = np.maximum(1, duration_mth_col)
     # Policy Count
     if policy_count_fixed_val:
         policy_count_col_val = np.ones(mp_count_val, dtype=int)
     return model_point_df
+# 2. Analytics Functions
+def generate_summary_statistics(df):
+    """Generate comprehensive summary statistics."""
+    if df is None or df.empty:
+        return pd.DataFrame()
+    # Numerical columns summary
+    numerical_cols = ['age_at_entry', 'policy_term', 'policy_count', 'sum_assured', 'duration_mth']
+    summary_stats = df[numerical_cols].describe().round(2)
+    # Add additional statistics
+    additional_stats = pd.DataFrame({
+        'age_at_entry': [df['age_at_entry'].mode()[0], df['age_at_entry'].var()],
+        'policy_term': [df['policy_term'].mode()[0], df['policy_term'].var()],
+        'policy_count': [df['policy_count'].mode()[0], df['policy_count'].var()],
+        'sum_assured': [df['sum_assured'].mode()[0], df['sum_assured'].var()],
+        'duration_mth': [df['duration_mth'].mode()[0], df['duration_mth'].var()]
+    }, index=['mode', 'variance']).round(2)
+    summary_stats = pd.concat([summary_stats, additional_stats])
+    return summary_stats
+def create_distribution_plots(df):
+    """Create distribution plots with normal curve overlay."""
+    if df is None or df.empty:
+        return None, None, None
+    # Age distribution with normal curve
+    fig_age = plt.figure(figsize=(10, 6))
+    # Histogram
+    plt.hist(df['age_at_entry'], bins=20, density=True, alpha=0.7, color='skyblue', edgecolor='black')
+    # Normal curve overlay
+    age_mean = df['age_at_entry'].mean()
+    age_std = df['age_at_entry'].std()
+    x_age = np.linspace(df['age_at_entry'].min(), df['age_at_entry'].max(), 100)
+    y_age = stats.norm.pdf(x_age, age_mean, age_std)
+    plt.plot(x_age, y_age, 'r-', linewidth=2, label=f'Normal Curve (μ={age_mean:.1f}, σ={age_std:.1f})')
+    plt.title('Age at Entry Distribution with Normal Curve Overlay', fontsize=14, fontweight='bold')
+    plt.xlabel('Age at Entry')
+    plt.ylabel('Density')
+    plt.legend()
+    plt.grid(True, alpha=0.3)
+    plt.tight_layout()
+    # Sum Assured distribution with normal curve
+    fig_sa = plt.figure(figsize=(10, 6))
+    # Histogram
+    plt.hist(df['sum_assured'], bins=30, density=True, alpha=0.7, color='lightgreen', edgecolor='black')
+    # Normal curve overlay
+    sa_mean = df['sum_assured'].mean()
+    sa_std = df['sum_assured'].std()
+    x_sa = np.linspace(df['sum_assured'].min(), df['sum_assured'].max(), 100)
+    y_sa = stats.norm.pdf(x_sa, sa_mean, sa_std)
+    plt.plot(x_sa, y_sa, 'r-', linewidth=2, label=f'Normal Curve (μ=${sa_mean:,.0f}, σ=${sa_std:,.0f})')
+    plt.title('Sum Assured Distribution with Normal Curve Overlay', fontsize=14, fontweight='bold')
+    plt.xlabel('Sum Assured ($)')
+    plt.ylabel('Density')
+    plt.legend()
+    plt.grid(True, alpha=0.3)
+    plt.gca().xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}'))
+    plt.tight_layout()
+    # Duration distribution
+    fig_duration = plt.figure(figsize=(10, 6))
+    # Histogram
+    plt.hist(df['duration_mth'], bins=25, density=True, alpha=0.7, color='lightcoral', edgecolor='black')
+    # Normal curve overlay
+    dur_mean = df['duration_mth'].mean()
+    dur_std = df['duration_mth'].std()
+    x_dur = np.linspace(df['duration_mth'].min(), df['duration_mth'].max(), 100)
+    y_dur = stats.norm.pdf(x_dur, dur_mean, dur_std)
+    plt.plot(x_dur, y_dur, 'r-', linewidth=2, label=f'Normal Curve (μ={dur_mean:.1f}, σ={dur_std:.1f})')
+    plt.title('Policy Duration (Months) Distribution with Normal Curve Overlay', fontsize=14, fontweight='bold')
+    plt.xlabel('Duration (Months)')
+    plt.ylabel('Density')
+    plt.legend()
+    plt.grid(True, alpha=0.3)
+    plt.tight_layout()
+    return fig_age, fig_sa, fig_duration
+def create_categorical_analysis(df):
+    """Create categorical variable analysis."""
+    if df is None or df.empty:
+        return pd.DataFrame(), None
+    # Sex distribution
+    sex_dist = df['sex'].value_counts().reset_index()
+    sex_dist.columns = ['Sex', 'Count']
+    sex_dist['Percentage'] = (sex_dist['Count'] / len(df) * 100).round(2)
+    # Policy term distribution
+    term_dist = df['policy_term'].value_counts().sort_index().reset_index()
+    term_dist.columns = ['Policy_Term', 'Count']
+    term_dist['Percentage'] = (term_dist['Count'] / len(df) * 100).round(2)
+    # Combined categorical summary
+    categorical_summary = pd.DataFrame({
+        'Variable': ['Sex Distribution', '', 'Policy Term Distribution'] + [''] * (len(term_dist) - 1),
+        'Category': [''] + list(sex_dist['Sex']) + [''] + list(term_dist['Policy_Term'].astype(str) + ' years'),
+        'Count': [''] + list(sex_dist['Count']) + [''] + list(term_dist['Count']),
+        'Percentage': [''] + list(sex_dist['Percentage'].astype(str) + '%') + [''] + list(term_dist['Percentage'].astype(str) + '%')
+    })
+    # Create bar plot for policy terms
+    fig_terms = plt.figure(figsize=(10, 6))
+    bars = plt.bar(term_dist['Policy_Term'].astype(str), term_dist['Count'], color='gold', edgecolor='black', alpha=0.8)
+    plt.title('Policy Term Distribution', fontsize=14, fontweight='bold')
+    plt.xlabel('Policy Term (Years)')
+    plt.ylabel('Count')
+    plt.grid(True, alpha=0.3, axis='y')
+    # Add count labels on bars
+    for bar in bars:
+        height = bar.get_height()
+        plt.text(bar.get_x() + bar.get_width()/2., height,
+                f'{int(height)}', ha='center', va='bottom')
+    plt.tight_layout()
+    return categorical_summary, fig_terms
+def create_correlation_analysis(df):
+    """Create correlation analysis."""
+    if df is None or df.empty:
+        return None, pd.DataFrame()
+    # Select numerical columns for correlation
+    numerical_cols = ['age_at_entry', 'policy_term', 'policy_count', 'sum_assured', 'duration_mth']
+    corr_matrix = df[numerical_cols].corr()
+    # Create correlation heatmap
+    fig_corr = plt.figure(figsize=(10, 8))
+    mask = np.triu(np.ones_like(corr_matrix, dtype=bool))
+    sns.heatmap(corr_matrix, mask=mask, annot=True, cmap='RdYlBu_r', center=0,
+                square=True, fmt='.3f', cbar_kws={"shrink": .8})
+    plt.title('Correlation Matrix of Numerical Variables', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    # Create correlation summary table
+    corr_summary = corr_matrix.round(3)
+    return fig_corr, corr_summary
+def generate_business_insights(df):
+    """Generate business insights and key metrics."""
+    if df is None or df.empty:
+        return ""
+    total_policies = len(df)
+    total_sum_assured = df['sum_assured'].sum()
+    avg_sum_assured = df['sum_assured'].mean()
+    avg_age = df['age_at_entry'].mean()
+    avg_duration = df['duration_mth'].mean()
+    # Most common policy term
+    most_common_term = df['policy_term'].mode()[0]
+    term_percentage = (df['policy_term'] == most_common_term).mean() * 100
+    # Age groups
+    young_pct = ((df['age_at_entry'] <= 30).mean() * 100).round(1)
+    middle_pct = (((df['age_at_entry'] > 30) & (df['age_at_entry'] <= 50)).mean() * 100).round(1)
+    mature_pct = ((df['age_at_entry'] > 50).mean() * 100).round(1)
+    insights_text = f"""
+    ## 📊 Business Insights & Key Metrics
+    ### Portfolio Overview
+    - **Total Policies Generated**: {total_policies:,}
+    - **Total Sum Assured**: ${total_sum_assured:,.0f}
+    - **Average Sum Assured**: ${avg_sum_assured:,.0f}
+    - **Average Issue Age**: {avg_age:.1f} years
+    - **Average Policy Duration**: {avg_duration:.1f} months ({avg_duration/12:.1f} years)
+    ### Demographics
+    - **Young Policyholders (≤30)**: {young_pct}%
+    - **Middle-aged (31-50)**: {middle_pct}%
+    - **Mature (>50)**: {mature_pct}%
+    ### Product Mix
+    - **Most Popular Term**: {most_common_term} years ({term_percentage:.1f}% of policies)
+    - **Policy Duration Range**: {df['duration_mth'].min()} - {df['duration_mth'].max()} months
+    - **Sum Assured Range**: ${df['sum_assured'].min():,.0f} - ${df['sum_assured'].max():,.0f}
+    """
+    return insights_text
+# 3. Gradio App Definition
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🎯 Actuarial Model Points Generator with Analytics")
     gr.Markdown(
         "Configure the parameters below to generate a custom set of seriatim model points. "
+        "The generated table can be viewed and downloaded as an Excel file, complete with comprehensive analytics and insights."
     )
     df_state = gr.State()  # To hold the generated DataFrame
                 value=True, label="Fixed Policy Count = 1 (Uncheck for variable count 1-100)"
             )
+            generate_btn = gr.Button("Generate Model Points", variant="primary", size="lg")
         with gr.Column(scale=2):
             model_points_display = gr.Dataframe(label="Generated Model Points")
             download_excel_btn = gr.DownloadButton(
+                label="📥 Download Excel",
+                value="model_points.xlsx",
                 variant="secondary"
             )
+    # Analytics Section
+    gr.Markdown("---")
+    with gr.Row():
+        with gr.Column():
+            business_insights = gr.Markdown("Generate model points to see business insights...")
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### 📈 Summary Statistics")
+            summary_stats_display = gr.Dataframe(label="Descriptive Statistics")
+        with gr.Column():
+            gr.Markdown("### 🏷️ Categorical Analysis")
+            categorical_display = gr.Dataframe(label="Category Distributions")
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### 📊 Age Distribution")
+            age_plot = gr.Plot(label="Age Distribution with Normal Curve")
+        with gr.Column():
+            gr.Markdown("### 💰 Sum Assured Distribution")
+            sa_plot = gr.Plot(label="Sum Assured Distribution with Normal Curve")
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### ⏱️ Duration Distribution")
+            duration_plot = gr.Plot(label="Duration Distribution with Normal Curve")
+        with gr.Column():
+            gr.Markdown("### 📋 Policy Term Distribution")
+            terms_plot = gr.Plot(label="Policy Terms")
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### 🔗 Correlation Analysis")
+            correlation_plot = gr.Plot(label="Correlation Heatmap")
+        with gr.Column():
+            gr.Markdown("### 📋 Correlation Matrix")
+            correlation_matrix_display = gr.Dataframe(label="Correlation Coefficients")
+    # 4. Event Handlers
     def handle_generate_button_click(
         mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed
     ):
         if int(age_m) >= int(age_mx):
             gr.Warning("Minimum Age must be less than Maximum Age.")
+            return [df_state.value] * 10  # Return current state for all outputs
         if float(sa_m) >= float(sa_mx):
             gr.Warning("Minimum Sum Assured must be less than Maximum Sum Assured.")
+            return [df_state.value] * 10
         if not p_terms:
             gr.Warning("No Policy Terms selected. Using defaults: [10, 15, 20].")
+        gr.Info("Generating model points and analytics... Please wait.")
+        # Generate data
         df = generate_custom_model_points(
             mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed
         )
+        # Generate analytics
+        insights = generate_business_insights(df)
+        summary_stats = generate_summary_statistics(df)
+        categorical_summary, terms_fig = create_categorical_analysis(df)
+        age_fig, sa_fig, duration_fig = create_distribution_plots(df)
+        corr_fig, corr_matrix = create_correlation_analysis(df)
+        gr.Info(f"✅ {len(df)} model points generated successfully with complete analytics!")
+        return (
+            df,  # model_points_display
+            df,  # df_state
+            insights,  # business_insights
+            summary_stats,  # summary_stats_display
+            categorical_summary,  # categorical_display
+            age_fig,  # age_plot
+            sa_fig,  # sa_plot
+            duration_fig,  # duration_plot
+            terms_fig,  # terms_plot
+            corr_fig,  # correlation_plot
+            corr_matrix  # correlation_matrix_display
+        )
     def handle_download_button_click(current_df_to_download):
         if current_df_to_download is None or current_df_to_download.empty:
         include_sex_input, policy_count_fixed_input
     ]
+    outputs_list = [
+        model_points_display, df_state, business_insights, summary_stats_display,
+        categorical_display, age_plot, sa_plot, duration_plot, terms_plot,
+        correlation_plot, correlation_matrix_display
+    ]
     generate_btn.click(
         fn=handle_generate_button_click,
         inputs=inputs_list,
+        outputs=outputs_list
     )
     download_excel_btn.click(
         outputs=[download_excel_btn]
     )
 if __name__ == "__main__":
     demo.launch()