|
import gradio as gr |
|
import pandas as pd |
|
import numpy as np |
|
from numpy.random import default_rng |
|
import io |
|
import matplotlib |
|
matplotlib.use('Agg') |
|
import matplotlib.pyplot as plt |
|
import scipy.stats as stats |
|
|
|
|
|
def generate_custom_model_points( |
|
mp_count_val, seed_val, age_min_val, age_max_val, |
|
sa_min_val, sa_max_val, policy_terms_selection_val, |
|
include_sex_val, policy_count_fixed_val |
|
): |
|
""" |
|
Generates seriatim model points based on user-defined parameters. |
|
""" |
|
rng = default_rng(int(seed_val)) |
|
mp_count_val = int(mp_count_val) |
|
age_min_val = int(age_min_val) |
|
age_max_val = int(age_max_val) |
|
sa_min_val = float(sa_min_val) |
|
sa_max_val = float(sa_max_val) |
|
|
|
policy_id_col = np.arange(1, mp_count_val + 1) |
|
age_at_entry = rng.integers(low=age_min_val, high=age_max_val + 1, size=mp_count_val) |
|
|
|
if include_sex_val: |
|
sex_options = ["M", "F"] |
|
sex_col = np.fromiter(map(lambda i: sex_options[i], rng.integers(low=0, high=len(sex_options), size=mp_count_val)), np.dtype('<U1')) |
|
else: |
|
sex_col = np.full(mp_count_val, "U") |
|
|
|
if not policy_terms_selection_val: |
|
policy_terms_selection_val = [10, 15, 20] |
|
policy_term_options = np.array(policy_terms_selection_val).astype(int) |
|
if len(policy_term_options) == 0: |
|
policy_term_options = np.array([10, 15, 20]) |
|
gr.Warning("No policy terms selected. Using default terms [10, 15, 20].") |
|
|
|
policy_term_col = rng.choice(policy_term_options, size=mp_count_val) |
|
|
|
sum_assured_col = np.round((sa_max_val - sa_min_val) * rng.random(size=mp_count_val) + sa_min_val, -3) |
|
|
|
max_duration_val = policy_term_col * 12 - 1 |
|
max_duration_val = np.maximum(1, max_duration_val) |
|
duration_mth_col = (rng.random(size=mp_count_val) * max_duration_val).astype(int) + 1 |
|
duration_mth_col = np.minimum(duration_mth_col, max_duration_val) |
|
duration_mth_col = np.maximum(1, duration_mth_col) |
|
|
|
|
|
if policy_count_fixed_val: |
|
policy_count_col_val = np.ones(mp_count_val, dtype=int) |
|
else: |
|
policy_count_col_val = rng.integers(low=1, high=101, size=mp_count_val) |
|
|
|
data_dict = { |
|
"policy_id": policy_id_col, "age_at_entry": age_at_entry, "sex": sex_col, |
|
"policy_term": policy_term_col, "policy_count": policy_count_col_val, |
|
"sum_assured": sum_assured_col, "duration_mth": duration_mth_col |
|
} |
|
model_point_df = pd.DataFrame(data_dict) |
|
return model_point_df |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Actuarial Model Points Generator") |
|
gr.Markdown( |
|
"Configure the parameters below to generate a custom set of seriatim model points. " |
|
"The generated table can be viewed and downloaded. " |
|
"Additional summary statistics and visualizations are provided below." |
|
) |
|
|
|
df_state = gr.State() |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
gr.Markdown("### Generation Parameters") |
|
mp_count_input = gr.Slider(minimum=100, maximum=50000, value=1000, step=100, label="Number of Model Points") |
|
seed_input = gr.Number(value=12345, precision=0, label="Random Seed") |
|
gr.Markdown("#### Age Parameters") |
|
age_min_input = gr.Slider(minimum=18, maximum=40, value=20, step=1, label="Minimum Age at Entry") |
|
age_max_input = gr.Slider(minimum=41, maximum=80, value=59, step=1, label="Maximum Age at Entry") |
|
gr.Markdown("#### Sum Assured Parameters ($)") |
|
sum_assured_min_input = gr.Number(value=10000, label="Minimum Sum Assured") |
|
sum_assured_max_input = gr.Number(value=1000000, label="Maximum Sum Assured") |
|
gr.Markdown("#### Policy Options") |
|
policy_terms_input = gr.CheckboxGroup(choices=[5, 10, 15, 20, 25, 30], value=[10, 15, 20], label="Available Policy Terms (Years)") |
|
include_sex_input = gr.Checkbox(value=True, label="Include Sex (M/F)") |
|
policy_count_fixed_input = gr.Checkbox(value=True, label="Fixed Policy Count = 1") |
|
generate_btn = gr.Button("Generate Model Points", variant="primary") |
|
|
|
with gr.Column(scale=2): |
|
model_points_display = gr.Dataframe(label="Generated Model Points", wrap=True) |
|
download_excel_btn = gr.DownloadButton(label="Download Excel", variant="secondary") |
|
gr.Markdown("---") |
|
gr.Markdown("## 📊 Data Summary & Analysis") |
|
with gr.Tabs(): |
|
with gr.TabItem("Numerical Summary"): |
|
summary_stats_display = gr.Dataframe(label="Descriptive Statistics", wrap=True) |
|
with gr.TabItem("Distribution Plot"): |
|
gr.Markdown("Distribution of **Sum Assured**. Data is generated uniformly; a normal curve is fitted for illustration.") |
|
distribution_plot_display = gr.Plot() |
|
with gr.TabItem("Categorical Summary"): |
|
sex_summary_display = gr.Dataframe(label="Sex Distribution", wrap=True) |
|
policy_term_summary_display = gr.Dataframe(label="Policy Term Distribution", wrap=True) |
|
|
|
|
|
|
|
def handle_generate_button_click( |
|
mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed |
|
): |
|
empty_df = pd.DataFrame() |
|
empty_plot = None |
|
no_data_df_num = pd.DataFrame({'Message': ["No data generated or summary failed."]}) |
|
no_data_df_cat = pd.DataFrame({'Message': ["No data generated or summary failed."]}) |
|
|
|
current_df = df_state.value if df_state.value is not None else pd.DataFrame() |
|
|
|
if int(age_m) >= int(age_mx): |
|
gr.Warning("Minimum Age must be less than Maximum Age.") |
|
return current_df, df_state.value, pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]}), empty_plot, pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]}), pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]}) |
|
if float(sa_m) >= float(sa_mx): |
|
gr.Warning("Minimum Sum Assured must be less than Maximum Sum Assured.") |
|
return current_df, df_state.value, pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]}), empty_plot, pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]}), pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]}) |
|
if not p_terms: |
|
gr.Warning("At least one Policy Term must be selected. Using defaults.") |
|
p_terms = [10, 15, 20] |
|
|
|
gr.Info("Generating model points... Please wait.") |
|
try: |
|
df = generate_custom_model_points(mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed) |
|
gr.Info(f"{len(df)} model points generated successfully!") |
|
|
|
desc_stats_df = no_data_df_num |
|
if not df.empty: |
|
numerical_cols = ['age_at_entry', 'sum_assured', 'duration_mth', 'policy_count'] |
|
existing_numerical_cols = [col for col in numerical_cols if col in df.columns] |
|
if existing_numerical_cols: |
|
desc_stats = df[existing_numerical_cols].describe().transpose() |
|
if 'count' in desc_stats.columns: |
|
desc_stats['count'] = desc_stats['count'].astype(int) |
|
desc_stats_df = desc_stats.reset_index().rename(columns={'index': 'Statistic'}) |
|
else: |
|
desc_stats_df = pd.DataFrame({'Message': ["No numerical columns found for summary."]}) |
|
elif df.empty: |
|
desc_stats_df = pd.DataFrame({'Message': ["DataFrame is empty, no numerical data to describe."]}) |
|
|
|
fig = empty_plot |
|
if 'sum_assured' in df.columns and not df['sum_assured'].empty: |
|
fig, ax = plt.subplots(figsize=(8, 5)) |
|
ax.hist(df['sum_assured'], bins=50, density=True, alpha=0.7, color='skyblue', edgecolor='black', label='Actual Distribution') |
|
mu, std_dev = stats.norm.fit(df['sum_assured']) |
|
xmin_hist, xmax_hist = ax.get_xlim() |
|
x_norm = np.linspace(xmin_hist, xmax_hist, 100) |
|
p_norm = stats.norm.pdf(x_norm, mu, std_dev) |
|
ax.plot(x_norm, p_norm, 'r--', linewidth=2, label=f'Fitted Normal (μ={mu:,.0f}, σ={std_dev:,.0f})') |
|
ax.set_title(f"Sum Assured Distribution (N={len(df)})", fontsize=14) |
|
ax.set_xlabel("Sum Assured ($)", fontsize=12) |
|
ax.set_ylabel("Density", fontsize=12) |
|
ax.legend() |
|
ax.grid(axis='y', linestyle='--', alpha=0.7) |
|
plt.tight_layout() |
|
|
|
sex_counts_df = no_data_df_cat |
|
term_counts_df = no_data_df_cat |
|
|
|
if not df.empty: |
|
if 'sex' in df.columns: |
|
sex_counts = df['sex'].value_counts().reset_index() |
|
sex_counts.columns = ['Sex', 'Count'] |
|
sex_counts['Percentage'] = (sex_counts['Count'] / sex_counts['Count'].sum() * 100).round(2) |
|
sex_counts_df = sex_counts |
|
else: |
|
sex_counts_df = pd.DataFrame({'Message': ["'sex' column not found or data is empty."]}) |
|
|
|
if 'policy_term' in df.columns: |
|
term_counts = df['policy_term'].value_counts().sort_index().reset_index() |
|
term_counts.columns = ['Policy Term (Years)', 'Count'] |
|
term_counts['Percentage'] = (term_counts['Count'] / term_counts['Count'].sum() * 100).round(2) |
|
term_counts_df = term_counts |
|
else: |
|
term_counts_df = pd.DataFrame({'Message': ["'policy_term' column not found or data is empty."]}) |
|
elif df.empty: |
|
sex_counts_df = pd.DataFrame({'Message': ["DataFrame is empty, no categorical data for 'sex'."]}) |
|
term_counts_df = pd.DataFrame({'Message': ["DataFrame is empty, no categorical data for 'policy_term'."]}) |
|
|
|
return df, df, desc_stats_df, fig, sex_counts_df, term_counts_df |
|
|
|
except Exception as e: |
|
gr.Error(f"An error occurred during generation: {str(e)}") |
|
error_df = pd.DataFrame({'Error': [str(e)]}) |
|
return current_df, df_state.value, error_df, empty_plot, error_df, error_df |
|
|
|
|
|
def handle_download_button_click(current_df_to_download): |
|
if current_df_to_download is None or not isinstance(current_df_to_download, pd.DataFrame) or current_df_to_download.empty: |
|
gr.Warning("No data available to download. Generate model points first.") |
|
return gr.DownloadButton.update(interactive=False) |
|
|
|
excel_output = io.BytesIO() |
|
current_df_to_download.to_excel(excel_output, sheet_name='ModelPoints', engine='xlsxwriter', index=False) |
|
excel_output.seek(0) |
|
|
|
|
|
return gr.File(file_path=excel_output, file_name="model_points.xlsx", visible=True) |
|
|
|
|
|
inputs_list = [ |
|
mp_count_input, seed_input, age_min_input, age_max_input, |
|
sum_assured_min_input, sum_assured_max_input, policy_terms_input, |
|
include_sex_input, policy_count_fixed_input |
|
] |
|
|
|
generate_btn.click( |
|
fn=handle_generate_button_click, |
|
inputs=inputs_list, |
|
outputs=[ |
|
model_points_display, df_state, summary_stats_display, |
|
distribution_plot_display, sex_summary_display, policy_term_summary_display |
|
] |
|
) |
|
|
|
download_excel_btn.click( |
|
fn=handle_download_button_click, |
|
inputs=[df_state], |
|
outputs=[download_excel_btn] |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(debug=True) |