File size: 12,132 Bytes
42cf35e 4a7f563 fa05c25 d38b638 789a23e 42cf35e abb7e3f 42cf35e abb7e3f 42cf35e abb7e3f bf3c28f abb7e3f fa05c25 abb7e3f fa05c25 0b55761 abb7e3f 0b55761 abb7e3f 0b55761 abb7e3f da6aaca abb7e3f da6aaca fa05c25 abb7e3f fa05c25 b4e00d8 4a7f563 fa05c25 4a7f563 bf3c28f 4a7f563 789a23e 0b55761 ee6b750 4a7f563 abb7e3f fa05c25 42cf35e b4e00d8 fa05c25 b4e00d8 42cf35e abb7e3f da6aaca fa05c25 abb7e3f fa05c25 abb7e3f fa05c25 abb7e3f fa05c25 ee6b750 abb7e3f 7990da4 fa05c25 789a23e 0b55761 789a23e fa05c25 da6aaca 789a23e 0b55761 789a23e abb7e3f 0b55761 fa05c25 abb7e3f 0b55761 abb7e3f 0b55761 7990da4 0b55761 fa05c25 0b55761 7990da4 0b55761 fa05c25 da6aaca fa05c25 0b55761 fa05c25 0b55761 fa05c25 0b55761 fa05c25 7990da4 0b55761 fa05c25 7990da4 0b55761 fa05c25 0b55761 789a23e 4a7f563 275ee78 fa05c25 7990da4 0b55761 4a7f563 275ee78 7990da4 0b55761 789a23e abb7e3f bf3c28f 42cf35e 4a7f563 abb7e3f 789a23e fa05c25 0b55761 789a23e 42cf35e 0b55761 4a7f563 275ee78 abb7e3f 7990da4 42cf35e 4a7f563 42cf35e 789a23e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 |
import gradio as gr
import pandas as pd
import numpy as np
from numpy.random import default_rng
import io # For BytesIO to handle file in memory
import matplotlib # Explicitly set backend for environments where default might be an issue
matplotlib.use('Agg') # Use a non-interactive backend
import matplotlib.pyplot as plt
import scipy.stats as stats
# 1. Data Generation Function (customizable via UI filters)
def generate_custom_model_points(
mp_count_val, seed_val, age_min_val, age_max_val,
sa_min_val, sa_max_val, policy_terms_selection_val,
include_sex_val, policy_count_fixed_val
):
"""
Generates seriatim model points based on user-defined parameters.
"""
rng = default_rng(int(seed_val))
mp_count_val = int(mp_count_val)
age_min_val = int(age_min_val)
age_max_val = int(age_max_val)
sa_min_val = float(sa_min_val)
sa_max_val = float(sa_max_val)
policy_id_col = np.arange(1, mp_count_val + 1)
age_at_entry = rng.integers(low=age_min_val, high=age_max_val + 1, size=mp_count_val)
if include_sex_val:
sex_options = ["M", "F"]
sex_col = np.fromiter(map(lambda i: sex_options[i], rng.integers(low=0, high=len(sex_options), size=mp_count_val)), np.dtype('<U1'))
else:
sex_col = np.full(mp_count_val, "U")
if not policy_terms_selection_val:
policy_terms_selection_val = [10, 15, 20] # Default if empty
policy_term_options = np.array(policy_terms_selection_val).astype(int)
if len(policy_term_options) == 0: # Handle case where user deselects all
policy_term_options = np.array([10, 15, 20]) # Fallback to default
gr.Warning("No policy terms selected. Using default terms [10, 15, 20].")
policy_term_col = rng.choice(policy_term_options, size=mp_count_val)
sum_assured_col = np.round((sa_max_val - sa_min_val) * rng.random(size=mp_count_val) + sa_min_val, -3)
max_duration_val = policy_term_col * 12 - 1
max_duration_val = np.maximum(1, max_duration_val)
duration_mth_col = (rng.random(size=mp_count_val) * max_duration_val).astype(int) + 1
duration_mth_col = np.minimum(duration_mth_col, max_duration_val)
duration_mth_col = np.maximum(1, duration_mth_col)
if policy_count_fixed_val:
policy_count_col_val = np.ones(mp_count_val, dtype=int)
else:
policy_count_col_val = rng.integers(low=1, high=101, size=mp_count_val)
data_dict = {
"policy_id": policy_id_col, "age_at_entry": age_at_entry, "sex": sex_col,
"policy_term": policy_term_col, "policy_count": policy_count_col_val,
"sum_assured": sum_assured_col, "duration_mth": duration_mth_col
}
model_point_df = pd.DataFrame(data_dict)
return model_point_df
# 2. Gradio App Definition
with gr.Blocks() as demo:
gr.Markdown("# Actuarial Model Points Generator")
gr.Markdown(
"Configure the parameters below to generate a custom set of seriatim model points. "
"The generated table can be viewed and downloaded. "
"Additional summary statistics and visualizations are provided below."
)
df_state = gr.State()
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Generation Parameters")
mp_count_input = gr.Slider(minimum=100, maximum=50000, value=1000, step=100, label="Number of Model Points")
seed_input = gr.Number(value=12345, precision=0, label="Random Seed")
gr.Markdown("#### Age Parameters")
age_min_input = gr.Slider(minimum=18, maximum=40, value=20, step=1, label="Minimum Age at Entry")
age_max_input = gr.Slider(minimum=41, maximum=80, value=59, step=1, label="Maximum Age at Entry")
gr.Markdown("#### Sum Assured Parameters ($)")
sum_assured_min_input = gr.Number(value=10000, label="Minimum Sum Assured")
sum_assured_max_input = gr.Number(value=1000000, label="Maximum Sum Assured")
gr.Markdown("#### Policy Options")
policy_terms_input = gr.CheckboxGroup(choices=[5, 10, 15, 20, 25, 30], value=[10, 15, 20], label="Available Policy Terms (Years)")
include_sex_input = gr.Checkbox(value=True, label="Include Sex (M/F)")
policy_count_fixed_input = gr.Checkbox(value=True, label="Fixed Policy Count = 1")
generate_btn = gr.Button("Generate Model Points", variant="primary")
with gr.Column(scale=2):
model_points_display = gr.Dataframe(label="Generated Model Points", wrap=True) # CORRECTED: Removed height=400
download_excel_btn = gr.DownloadButton(label="Download Excel", variant="secondary")
gr.Markdown("---")
gr.Markdown("## 📊 Data Summary & Analysis")
with gr.Tabs():
with gr.TabItem("Numerical Summary"):
summary_stats_display = gr.Dataframe(label="Descriptive Statistics", wrap=True)
with gr.TabItem("Distribution Plot"):
gr.Markdown("Distribution of **Sum Assured**. Data is generated uniformly; a normal curve is fitted for illustration.")
distribution_plot_display = gr.Plot()
with gr.TabItem("Categorical Summary"):
sex_summary_display = gr.Dataframe(label="Sex Distribution", wrap=True)
policy_term_summary_display = gr.Dataframe(label="Policy Term Distribution", wrap=True)
# 3. Event Handlers
def handle_generate_button_click(
mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed
):
empty_df = pd.DataFrame()
empty_plot = None
no_data_df_num = pd.DataFrame({'Message': ["No data generated or summary failed."]})
no_data_df_cat = pd.DataFrame({'Message': ["No data generated or summary failed."]})
current_df = df_state.value if df_state.value is not None else pd.DataFrame()
if int(age_m) >= int(age_mx):
gr.Warning("Minimum Age must be less than Maximum Age.")
return current_df, df_state.value, pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]}), empty_plot, pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]}), pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]})
if float(sa_m) >= float(sa_mx):
gr.Warning("Minimum Sum Assured must be less than Maximum Sum Assured.")
return current_df, df_state.value, pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]}), empty_plot, pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]}), pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]})
if not p_terms:
gr.Warning("At least one Policy Term must be selected. Using defaults.")
p_terms = [10, 15, 20]
gr.Info("Generating model points... Please wait.")
try:
df = generate_custom_model_points(mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed)
gr.Info(f"{len(df)} model points generated successfully!")
desc_stats_df = no_data_df_num
if not df.empty:
numerical_cols = ['age_at_entry', 'sum_assured', 'duration_mth', 'policy_count']
existing_numerical_cols = [col for col in numerical_cols if col in df.columns]
if existing_numerical_cols:
desc_stats = df[existing_numerical_cols].describe().transpose()
if 'count' in desc_stats.columns:
desc_stats['count'] = desc_stats['count'].astype(int)
desc_stats_df = desc_stats.reset_index().rename(columns={'index': 'Statistic'})
else:
desc_stats_df = pd.DataFrame({'Message': ["No numerical columns found for summary."]})
elif df.empty:
desc_stats_df = pd.DataFrame({'Message': ["DataFrame is empty, no numerical data to describe."]})
fig = empty_plot
if 'sum_assured' in df.columns and not df['sum_assured'].empty:
fig, ax = plt.subplots(figsize=(8, 5))
ax.hist(df['sum_assured'], bins=50, density=True, alpha=0.7, color='skyblue', edgecolor='black', label='Actual Distribution')
mu, std_dev = stats.norm.fit(df['sum_assured'])
xmin_hist, xmax_hist = ax.get_xlim()
x_norm = np.linspace(xmin_hist, xmax_hist, 100)
p_norm = stats.norm.pdf(x_norm, mu, std_dev)
ax.plot(x_norm, p_norm, 'r--', linewidth=2, label=f'Fitted Normal (μ={mu:,.0f}, σ={std_dev:,.0f})')
ax.set_title(f"Sum Assured Distribution (N={len(df)})", fontsize=14)
ax.set_xlabel("Sum Assured ($)", fontsize=12)
ax.set_ylabel("Density", fontsize=12)
ax.legend()
ax.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
sex_counts_df = no_data_df_cat
term_counts_df = no_data_df_cat
if not df.empty:
if 'sex' in df.columns:
sex_counts = df['sex'].value_counts().reset_index()
sex_counts.columns = ['Sex', 'Count']
sex_counts['Percentage'] = (sex_counts['Count'] / sex_counts['Count'].sum() * 100).round(2)
sex_counts_df = sex_counts
else:
sex_counts_df = pd.DataFrame({'Message': ["'sex' column not found or data is empty."]})
if 'policy_term' in df.columns:
term_counts = df['policy_term'].value_counts().sort_index().reset_index()
term_counts.columns = ['Policy Term (Years)', 'Count']
term_counts['Percentage'] = (term_counts['Count'] / term_counts['Count'].sum() * 100).round(2)
term_counts_df = term_counts
else:
term_counts_df = pd.DataFrame({'Message': ["'policy_term' column not found or data is empty."]})
elif df.empty:
sex_counts_df = pd.DataFrame({'Message': ["DataFrame is empty, no categorical data for 'sex'."]})
term_counts_df = pd.DataFrame({'Message': ["DataFrame is empty, no categorical data for 'policy_term'."]})
return df, df, desc_stats_df, fig, sex_counts_df, term_counts_df
except Exception as e:
gr.Error(f"An error occurred during generation: {str(e)}")
error_df = pd.DataFrame({'Error': [str(e)]})
return current_df, df_state.value, error_df, empty_plot, error_df, error_df
def handle_download_button_click(current_df_to_download):
if current_df_to_download is None or not isinstance(current_df_to_download, pd.DataFrame) or current_df_to_download.empty:
gr.Warning("No data available to download. Generate model points first.")
return gr.DownloadButton.update(interactive=False)
excel_output = io.BytesIO()
current_df_to_download.to_excel(excel_output, sheet_name='ModelPoints', engine='xlsxwriter', index=False)
excel_output.seek(0)
# Returning a BytesIO object directly for file download
# Provide a filename for the browser
return gr.File(file_path=excel_output, file_name="model_points.xlsx", visible=True)
inputs_list = [
mp_count_input, seed_input, age_min_input, age_max_input,
sum_assured_min_input, sum_assured_max_input, policy_terms_input,
include_sex_input, policy_count_fixed_input
]
generate_btn.click(
fn=handle_generate_button_click,
inputs=inputs_list,
outputs=[
model_points_display, df_state, summary_stats_display,
distribution_plot_display, sex_summary_display, policy_term_summary_display
]
)
download_excel_btn.click(
fn=handle_download_button_click,
inputs=[df_state],
outputs=[download_excel_btn]
)
if __name__ == "__main__":
demo.launch(debug=True) |