Spaces:

alidenewade
/

actuarial-model-point-generator

Sleeping

File size: 12,132 Bytes

42cf35e
 
 
 
4a7f563
fa05c25
 
d38b638
789a23e
42cf35e
abb7e3f
 
 
 
 
 
42cf35e
abb7e3f
42cf35e
abb7e3f
 
 
 
 
 
 
bf3c28f
abb7e3f
 
 
 
 
 
fa05c25
abb7e3f
fa05c25
0b55761
abb7e3f
0b55761
 
 
 
abb7e3f
 
 
0b55761
abb7e3f
da6aaca
abb7e3f
da6aaca
 
fa05c25
abb7e3f
 
 
 
fa05c25
b4e00d8
4a7f563
fa05c25
 
 
4a7f563
bf3c28f
4a7f563
 
789a23e
0b55761
ee6b750
4a7f563
abb7e3f
fa05c25
 
42cf35e
b4e00d8
fa05c25
b4e00d8
42cf35e
abb7e3f
 
da6aaca
fa05c25
abb7e3f
fa05c25
 
abb7e3f
fa05c25
 
abb7e3f
fa05c25
 
 
ee6b750
abb7e3f
 
7990da4
 
fa05c25
789a23e
 
 
0b55761
789a23e
fa05c25
da6aaca
789a23e
0b55761
 
 
789a23e
 
abb7e3f
 
 
0b55761
 
 
 
 
fa05c25
 
abb7e3f
 
0b55761
abb7e3f
 
0b55761
 
 
7990da4
0b55761
fa05c25
 
 
 
 
0b55761
7990da4
 
 
 
 
 
 
 
 
 
0b55761
 
 
 
fa05c25
da6aaca
fa05c25
0b55761
fa05c25
 
0b55761
 
fa05c25
 
 
 
 
 
0b55761
 
 
fa05c25
 
 
 
 
7990da4
0b55761
 
 
 
fa05c25
 
 
7990da4
0b55761
 
 
 
 
 
 
 
fa05c25
 
 
0b55761
 
789a23e
4a7f563
 
275ee78
fa05c25
7990da4
0b55761
4a7f563
275ee78
 
7990da4
 
0b55761
 
789a23e
abb7e3f
 
 
 
 
bf3c28f
42cf35e
4a7f563
abb7e3f
789a23e
fa05c25
0b55761
789a23e
42cf35e
0b55761
4a7f563
275ee78
abb7e3f
7990da4
42cf35e
4a7f563
42cf35e
789a23e

import gradio as gr
import pandas as pd
import numpy as np
from numpy.random import default_rng
import io  # For BytesIO to handle file in memory
import matplotlib # Explicitly set backend for environments where default might be an issue
matplotlib.use('Agg') # Use a non-interactive backend
import matplotlib.pyplot as plt
import scipy.stats as stats

# 1. Data Generation Function (customizable via UI filters)
def generate_custom_model_points(
    mp_count_val, seed_val, age_min_val, age_max_val,
    sa_min_val, sa_max_val, policy_terms_selection_val,
    include_sex_val, policy_count_fixed_val
):
    """
    Generates seriatim model points based on user-defined parameters.
    """
    rng = default_rng(int(seed_val))
    mp_count_val = int(mp_count_val)
    age_min_val = int(age_min_val)
    age_max_val = int(age_max_val)
    sa_min_val = float(sa_min_val)
    sa_max_val = float(sa_max_val)

    policy_id_col = np.arange(1, mp_count_val + 1)
    age_at_entry = rng.integers(low=age_min_val, high=age_max_val + 1, size=mp_count_val)

    if include_sex_val:
        sex_options = ["M", "F"]
        sex_col = np.fromiter(map(lambda i: sex_options[i], rng.integers(low=0, high=len(sex_options), size=mp_count_val)), np.dtype('<U1'))
    else:
        sex_col = np.full(mp_count_val, "U")

    if not policy_terms_selection_val:
        policy_terms_selection_val = [10, 15, 20] # Default if empty
    policy_term_options = np.array(policy_terms_selection_val).astype(int)
    if len(policy_term_options) == 0: # Handle case where user deselects all
        policy_term_options = np.array([10, 15, 20]) # Fallback to default
        gr.Warning("No policy terms selected. Using default terms [10, 15, 20].")

    policy_term_col = rng.choice(policy_term_options, size=mp_count_val)

    sum_assured_col = np.round((sa_max_val - sa_min_val) * rng.random(size=mp_count_val) + sa_min_val, -3)

    max_duration_val = policy_term_col * 12 - 1
    max_duration_val = np.maximum(1, max_duration_val)
    duration_mth_col = (rng.random(size=mp_count_val) * max_duration_val).astype(int) + 1
    duration_mth_col = np.minimum(duration_mth_col, max_duration_val)
    duration_mth_col = np.maximum(1, duration_mth_col)


    if policy_count_fixed_val:
        policy_count_col_val = np.ones(mp_count_val, dtype=int)
    else:
        policy_count_col_val = rng.integers(low=1, high=101, size=mp_count_val)

    data_dict = {
        "policy_id": policy_id_col, "age_at_entry": age_at_entry, "sex": sex_col,
        "policy_term": policy_term_col, "policy_count": policy_count_col_val,
        "sum_assured": sum_assured_col, "duration_mth": duration_mth_col
    }
    model_point_df = pd.DataFrame(data_dict)
    return model_point_df

# 2. Gradio App Definition
with gr.Blocks() as demo:
    gr.Markdown("# Actuarial Model Points Generator")
    gr.Markdown(
        "Configure the parameters below to generate a custom set of seriatim model points. "
        "The generated table can be viewed and downloaded. "
        "Additional summary statistics and visualizations are provided below."
    )

    df_state = gr.State()

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### Generation Parameters")
            mp_count_input = gr.Slider(minimum=100, maximum=50000, value=1000, step=100, label="Number of Model Points")
            seed_input = gr.Number(value=12345, precision=0, label="Random Seed")
            gr.Markdown("#### Age Parameters")
            age_min_input = gr.Slider(minimum=18, maximum=40, value=20, step=1, label="Minimum Age at Entry")
            age_max_input = gr.Slider(minimum=41, maximum=80, value=59, step=1, label="Maximum Age at Entry")
            gr.Markdown("#### Sum Assured Parameters ($)")
            sum_assured_min_input = gr.Number(value=10000, label="Minimum Sum Assured")
            sum_assured_max_input = gr.Number(value=1000000, label="Maximum Sum Assured")
            gr.Markdown("#### Policy Options")
            policy_terms_input = gr.CheckboxGroup(choices=[5, 10, 15, 20, 25, 30], value=[10, 15, 20], label="Available Policy Terms (Years)")
            include_sex_input = gr.Checkbox(value=True, label="Include Sex (M/F)")
            policy_count_fixed_input = gr.Checkbox(value=True, label="Fixed Policy Count = 1")
            generate_btn = gr.Button("Generate Model Points", variant="primary")

        with gr.Column(scale=2):
            model_points_display = gr.Dataframe(label="Generated Model Points", wrap=True) # CORRECTED: Removed height=400
            download_excel_btn = gr.DownloadButton(label="Download Excel", variant="secondary")
            gr.Markdown("---")
            gr.Markdown("## 📊 Data Summary & Analysis")
            with gr.Tabs():
                with gr.TabItem("Numerical Summary"):
                    summary_stats_display = gr.Dataframe(label="Descriptive Statistics", wrap=True)
                with gr.TabItem("Distribution Plot"):
                    gr.Markdown("Distribution of **Sum Assured**. Data is generated uniformly; a normal curve is fitted for illustration.")
                    distribution_plot_display = gr.Plot()
                with gr.TabItem("Categorical Summary"):
                    sex_summary_display = gr.Dataframe(label="Sex Distribution", wrap=True)
                    policy_term_summary_display = gr.Dataframe(label="Policy Term Distribution", wrap=True)


    # 3. Event Handlers
    def handle_generate_button_click(
        mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed
    ):
        empty_df = pd.DataFrame()
        empty_plot = None
        no_data_df_num = pd.DataFrame({'Message': ["No data generated or summary failed."]})
        no_data_df_cat = pd.DataFrame({'Message': ["No data generated or summary failed."]})

        current_df = df_state.value if df_state.value is not None else pd.DataFrame()

        if int(age_m) >= int(age_mx):
            gr.Warning("Minimum Age must be less than Maximum Age.")
            return current_df, df_state.value, pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]}), empty_plot, pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]}), pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]})
        if float(sa_m) >= float(sa_mx):
            gr.Warning("Minimum Sum Assured must be less than Maximum Sum Assured.")
            return current_df, df_state.value, pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]}), empty_plot, pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]}), pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]})
        if not p_terms:
            gr.Warning("At least one Policy Term must be selected. Using defaults.")
            p_terms = [10, 15, 20]

        gr.Info("Generating model points... Please wait.")
        try:
            df = generate_custom_model_points(mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed)
            gr.Info(f"{len(df)} model points generated successfully!")

            desc_stats_df = no_data_df_num
            if not df.empty:
                numerical_cols = ['age_at_entry', 'sum_assured', 'duration_mth', 'policy_count']
                existing_numerical_cols = [col for col in numerical_cols if col in df.columns]
                if existing_numerical_cols:
                    desc_stats = df[existing_numerical_cols].describe().transpose()
                    if 'count' in desc_stats.columns:
                        desc_stats['count'] = desc_stats['count'].astype(int)
                    desc_stats_df = desc_stats.reset_index().rename(columns={'index': 'Statistic'})
                else:
                    desc_stats_df = pd.DataFrame({'Message': ["No numerical columns found for summary."]})
            elif df.empty:
                 desc_stats_df = pd.DataFrame({'Message': ["DataFrame is empty, no numerical data to describe."]})

            fig = empty_plot
            if 'sum_assured' in df.columns and not df['sum_assured'].empty:
                fig, ax = plt.subplots(figsize=(8, 5))
                ax.hist(df['sum_assured'], bins=50, density=True, alpha=0.7, color='skyblue', edgecolor='black', label='Actual Distribution')
                mu, std_dev = stats.norm.fit(df['sum_assured'])
                xmin_hist, xmax_hist = ax.get_xlim()
                x_norm = np.linspace(xmin_hist, xmax_hist, 100)
                p_norm = stats.norm.pdf(x_norm, mu, std_dev)
                ax.plot(x_norm, p_norm, 'r--', linewidth=2, label=f'Fitted Normal (μ={mu:,.0f}, σ={std_dev:,.0f})')
                ax.set_title(f"Sum Assured Distribution (N={len(df)})", fontsize=14)
                ax.set_xlabel("Sum Assured ($)", fontsize=12)
                ax.set_ylabel("Density", fontsize=12)
                ax.legend()
                ax.grid(axis='y', linestyle='--', alpha=0.7)
                plt.tight_layout()

            sex_counts_df = no_data_df_cat
            term_counts_df = no_data_df_cat

            if not df.empty:
                if 'sex' in df.columns:
                    sex_counts = df['sex'].value_counts().reset_index()
                    sex_counts.columns = ['Sex', 'Count']
                    sex_counts['Percentage'] = (sex_counts['Count'] / sex_counts['Count'].sum() * 100).round(2)
                    sex_counts_df = sex_counts
                else:
                    sex_counts_df = pd.DataFrame({'Message': ["'sex' column not found or data is empty."]})

                if 'policy_term' in df.columns:
                    term_counts = df['policy_term'].value_counts().sort_index().reset_index()
                    term_counts.columns = ['Policy Term (Years)', 'Count']
                    term_counts['Percentage'] = (term_counts['Count'] / term_counts['Count'].sum() * 100).round(2)
                    term_counts_df = term_counts
                else:
                    term_counts_df = pd.DataFrame({'Message': ["'policy_term' column not found or data is empty."]})
            elif df.empty:
                 sex_counts_df = pd.DataFrame({'Message': ["DataFrame is empty, no categorical data for 'sex'."]})
                 term_counts_df = pd.DataFrame({'Message': ["DataFrame is empty, no categorical data for 'policy_term'."]})

            return df, df, desc_stats_df, fig, sex_counts_df, term_counts_df

        except Exception as e:
            gr.Error(f"An error occurred during generation: {str(e)}")
            error_df = pd.DataFrame({'Error': [str(e)]})
            return current_df, df_state.value, error_df, empty_plot, error_df, error_df


    def handle_download_button_click(current_df_to_download):
        if current_df_to_download is None or not isinstance(current_df_to_download, pd.DataFrame) or current_df_to_download.empty:
            gr.Warning("No data available to download. Generate model points first.")
            return gr.DownloadButton.update(interactive=False)

        excel_output = io.BytesIO()
        current_df_to_download.to_excel(excel_output, sheet_name='ModelPoints', engine='xlsxwriter', index=False)
        excel_output.seek(0)
        # Returning a BytesIO object directly for file download
        # Provide a filename for the browser
        return gr.File(file_path=excel_output, file_name="model_points.xlsx", visible=True)


    inputs_list = [
        mp_count_input, seed_input, age_min_input, age_max_input,
        sum_assured_min_input, sum_assured_max_input, policy_terms_input,
        include_sex_input, policy_count_fixed_input
    ]

    generate_btn.click(
        fn=handle_generate_button_click,
        inputs=inputs_list,
        outputs=[
            model_points_display, df_state, summary_stats_display,
            distribution_plot_display, sex_summary_display, policy_term_summary_display
        ]
    )

    download_excel_btn.click(
        fn=handle_download_button_click,
        inputs=[df_state],
        outputs=[download_excel_btn]
    )

if __name__ == "__main__":
    demo.launch(debug=True)