Update app.py
Browse files
app.py
CHANGED
|
@@ -4,15 +4,6 @@ import numpy as np
|
|
| 4 |
from numpy.random import default_rng
|
| 5 |
import io # For BytesIO to handle file in memory
|
| 6 |
import matplotlib.pyplot as plt
|
| 7 |
-
import seaborn as sns
|
| 8 |
-
from scipy import stats
|
| 9 |
-
import plotly.express as px
|
| 10 |
-
import plotly.graph_objects as go
|
| 11 |
-
from plotly.subplots import make_subplots
|
| 12 |
-
|
| 13 |
-
# Set style for matplotlib
|
| 14 |
-
plt.style.use('default')
|
| 15 |
-
sns.set_palette("husl")
|
| 16 |
|
| 17 |
# 1. Data Generation Function (customizable via UI filters)
|
| 18 |
def generate_custom_model_points(
|
|
@@ -64,6 +55,7 @@ def generate_custom_model_points(
|
|
| 64 |
# And ensure it's at least 1
|
| 65 |
duration_mth_col = np.maximum(1, duration_mth_col)
|
| 66 |
|
|
|
|
| 67 |
# Policy Count
|
| 68 |
if policy_count_fixed_val:
|
| 69 |
policy_count_col_val = np.ones(mp_count_val, dtype=int)
|
|
@@ -95,113 +87,84 @@ def generate_summary_statistics(df):
|
|
| 95 |
numerical_cols = ['age_at_entry', 'policy_term', 'policy_count', 'sum_assured', 'duration_mth']
|
| 96 |
summary_stats = df[numerical_cols].describe().round(2)
|
| 97 |
|
| 98 |
-
# Add additional statistics
|
| 99 |
-
additional_stats = pd.DataFrame({
|
| 100 |
-
'age_at_entry': [df['age_at_entry'].mode()[0], df['age_at_entry'].var()],
|
| 101 |
-
'policy_term': [df['policy_term'].mode()[0], df['policy_term'].var()],
|
| 102 |
-
'policy_count': [df['policy_count'].mode()[0], df['policy_count'].var()],
|
| 103 |
-
'sum_assured': [df['sum_assured'].mode()[0], df['sum_assured'].var()],
|
| 104 |
-
'duration_mth': [df['duration_mth'].mode()[0], df['duration_mth'].var()]
|
| 105 |
-
}, index=['mode', 'variance']).round(2)
|
| 106 |
-
|
| 107 |
-
summary_stats = pd.concat([summary_stats, additional_stats])
|
| 108 |
return summary_stats
|
| 109 |
|
| 110 |
-
def
|
| 111 |
-
"""Create distribution
|
| 112 |
if df is None or df.empty:
|
| 113 |
-
return None
|
| 114 |
|
| 115 |
-
|
| 116 |
-
fig_age = plt.figure(figsize=(10, 6))
|
| 117 |
|
| 118 |
-
#
|
| 119 |
-
plt.hist(df['age_at_entry'], bins=20, density=True, alpha=0.7, color='skyblue', edgecolor='black')
|
| 120 |
|
| 121 |
-
#
|
| 122 |
age_mean = df['age_at_entry'].mean()
|
| 123 |
age_std = df['age_at_entry'].std()
|
| 124 |
-
|
| 125 |
-
y_age = stats.norm.pdf(x_age, age_mean, age_std)
|
| 126 |
-
plt.plot(x_age, y_age, 'r-', linewidth=2, label=f'Normal Curve (μ={age_mean:.1f}, σ={age_std:.1f})')
|
| 127 |
|
| 128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
plt.xlabel('Age at Entry')
|
| 130 |
plt.ylabel('Density')
|
| 131 |
plt.legend()
|
| 132 |
plt.grid(True, alpha=0.3)
|
| 133 |
plt.tight_layout()
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
-
#
|
| 139 |
plt.hist(df['sum_assured'], bins=30, density=True, alpha=0.7, color='lightgreen', edgecolor='black')
|
| 140 |
|
| 141 |
-
#
|
| 142 |
sa_mean = df['sum_assured'].mean()
|
| 143 |
sa_std = df['sum_assured'].std()
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
|
|
|
| 147 |
|
| 148 |
-
plt.
|
|
|
|
|
|
|
| 149 |
plt.xlabel('Sum Assured ($)')
|
| 150 |
plt.ylabel('Density')
|
| 151 |
plt.legend()
|
| 152 |
plt.grid(True, alpha=0.3)
|
| 153 |
-
plt.gca().xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}'))
|
| 154 |
-
plt.tight_layout()
|
| 155 |
|
| 156 |
-
#
|
| 157 |
-
|
|
|
|
| 158 |
|
| 159 |
-
# Histogram
|
| 160 |
-
plt.hist(df['duration_mth'], bins=25, density=True, alpha=0.7, color='lightcoral', edgecolor='black')
|
| 161 |
-
|
| 162 |
-
# Normal curve overlay
|
| 163 |
-
dur_mean = df['duration_mth'].mean()
|
| 164 |
-
dur_std = df['duration_mth'].std()
|
| 165 |
-
x_dur = np.linspace(df['duration_mth'].min(), df['duration_mth'].max(), 100)
|
| 166 |
-
y_dur = stats.norm.pdf(x_dur, dur_mean, dur_std)
|
| 167 |
-
plt.plot(x_dur, y_dur, 'r-', linewidth=2, label=f'Normal Curve (μ={dur_mean:.1f}, σ={dur_std:.1f})')
|
| 168 |
-
|
| 169 |
-
plt.title('Policy Duration (Months) Distribution with Normal Curve Overlay', fontsize=14, fontweight='bold')
|
| 170 |
-
plt.xlabel('Duration (Months)')
|
| 171 |
-
plt.ylabel('Density')
|
| 172 |
-
plt.legend()
|
| 173 |
-
plt.grid(True, alpha=0.3)
|
| 174 |
plt.tight_layout()
|
| 175 |
|
| 176 |
-
return
|
| 177 |
|
| 178 |
-
def
|
| 179 |
-
"""Create
|
| 180 |
if df is None or df.empty:
|
| 181 |
-
return
|
| 182 |
|
| 183 |
-
|
| 184 |
-
sex_dist = df['sex'].value_counts().reset_index()
|
| 185 |
-
sex_dist.columns = ['Sex', 'Count']
|
| 186 |
-
sex_dist['Percentage'] = (sex_dist['Count'] / len(df) * 100).round(2)
|
| 187 |
|
| 188 |
-
#
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
categorical_summary = pd.DataFrame({
|
| 195 |
-
'Variable': ['Sex Distribution', '', 'Policy Term Distribution'] + [''] * (len(term_dist) - 1),
|
| 196 |
-
'Category': [''] + list(sex_dist['Sex']) + [''] + list(term_dist['Policy_Term'].astype(str) + ' years'),
|
| 197 |
-
'Count': [''] + list(sex_dist['Count']) + [''] + list(term_dist['Count']),
|
| 198 |
-
'Percentage': [''] + list(sex_dist['Percentage'].astype(str) + '%') + [''] + list(term_dist['Percentage'].astype(str) + '%')
|
| 199 |
-
})
|
| 200 |
-
|
| 201 |
-
# Create bar plot for policy terms
|
| 202 |
-
fig_terms = plt.figure(figsize=(10, 6))
|
| 203 |
-
bars = plt.bar(term_dist['Policy_Term'].astype(str), term_dist['Count'], color='gold', edgecolor='black', alpha=0.8)
|
| 204 |
-
plt.title('Policy Term Distribution', fontsize=14, fontweight='bold')
|
| 205 |
plt.xlabel('Policy Term (Years)')
|
| 206 |
plt.ylabel('Count')
|
| 207 |
plt.grid(True, alpha=0.3, axis='y')
|
|
@@ -214,34 +177,45 @@ def create_categorical_analysis(df):
|
|
| 214 |
|
| 215 |
plt.tight_layout()
|
| 216 |
|
| 217 |
-
return
|
| 218 |
|
| 219 |
-
def
|
| 220 |
-
"""
|
| 221 |
if df is None or df.empty:
|
| 222 |
-
return
|
| 223 |
|
| 224 |
-
#
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
#
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
|
| 236 |
-
#
|
| 237 |
-
|
| 238 |
|
| 239 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
|
| 241 |
def generate_business_insights(df):
|
| 242 |
"""Generate business insights and key metrics."""
|
| 243 |
if df is None or df.empty:
|
| 244 |
-
return ""
|
| 245 |
|
| 246 |
total_policies = len(df)
|
| 247 |
total_sum_assured = df['sum_assured'].sum()
|
|
@@ -254,39 +228,38 @@ def generate_business_insights(df):
|
|
| 254 |
term_percentage = (df['policy_term'] == most_common_term).mean() * 100
|
| 255 |
|
| 256 |
# Age groups
|
| 257 |
-
young_pct = ((df['age_at_entry'] <= 30).mean() * 100)
|
| 258 |
-
middle_pct = (((df['age_at_entry'] > 30) & (df['age_at_entry'] <= 50)).mean() * 100)
|
| 259 |
-
mature_pct = ((df['age_at_entry'] > 50).mean() * 100)
|
| 260 |
-
|
| 261 |
-
insights_text = f"""
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
"""
|
| 281 |
|
| 282 |
return insights_text
|
| 283 |
|
| 284 |
# 3. Gradio App Definition
|
| 285 |
-
with gr.Blocks(
|
| 286 |
-
gr.Markdown("#
|
| 287 |
gr.Markdown(
|
| 288 |
"Configure the parameters below to generate a custom set of seriatim model points. "
|
| 289 |
-
"The generated table can be viewed and downloaded as an Excel file
|
| 290 |
)
|
| 291 |
|
| 292 |
df_state = gr.State() # To hold the generated DataFrame
|
|
@@ -331,70 +304,57 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 331 |
value=True, label="Fixed Policy Count = 1 (Uncheck for variable count 1-100)"
|
| 332 |
)
|
| 333 |
|
| 334 |
-
generate_btn = gr.Button("Generate Model Points", variant="primary"
|
| 335 |
|
| 336 |
with gr.Column(scale=2):
|
| 337 |
model_points_display = gr.Dataframe(label="Generated Model Points")
|
| 338 |
download_excel_btn = gr.DownloadButton(
|
| 339 |
-
label="
|
| 340 |
-
value="model_points.xlsx",
|
| 341 |
variant="secondary"
|
| 342 |
)
|
| 343 |
|
| 344 |
# Analytics Section
|
| 345 |
gr.Markdown("---")
|
| 346 |
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
business_insights = gr.Markdown("Generate model points to see business insights...")
|
| 350 |
|
| 351 |
with gr.Row():
|
| 352 |
with gr.Column():
|
| 353 |
-
gr.Markdown("###
|
| 354 |
summary_stats_display = gr.Dataframe(label="Descriptive Statistics")
|
| 355 |
with gr.Column():
|
| 356 |
-
gr.Markdown("###
|
| 357 |
categorical_display = gr.Dataframe(label="Category Distributions")
|
| 358 |
|
| 359 |
with gr.Row():
|
| 360 |
with gr.Column():
|
| 361 |
-
gr.Markdown("###
|
| 362 |
-
age_plot = gr.Plot(label="Age Distribution
|
| 363 |
with gr.Column():
|
| 364 |
-
gr.Markdown("###
|
| 365 |
-
sa_plot = gr.Plot(label="Sum Assured Distribution
|
| 366 |
|
| 367 |
with gr.Row():
|
| 368 |
with gr.Column():
|
| 369 |
-
gr.Markdown("###
|
| 370 |
-
duration_plot = gr.Plot(label="Duration Distribution with Normal Curve")
|
| 371 |
-
with gr.Column():
|
| 372 |
-
gr.Markdown("### 📋 Policy Term Distribution")
|
| 373 |
terms_plot = gr.Plot(label="Policy Terms")
|
| 374 |
|
| 375 |
-
with gr.Row():
|
| 376 |
-
with gr.Column():
|
| 377 |
-
gr.Markdown("### 🔗 Correlation Analysis")
|
| 378 |
-
correlation_plot = gr.Plot(label="Correlation Heatmap")
|
| 379 |
-
with gr.Column():
|
| 380 |
-
gr.Markdown("### 📋 Correlation Matrix")
|
| 381 |
-
correlation_matrix_display = gr.Dataframe(label="Correlation Coefficients")
|
| 382 |
-
|
| 383 |
# 4. Event Handlers
|
| 384 |
def handle_generate_button_click(
|
| 385 |
mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed
|
| 386 |
):
|
| 387 |
if int(age_m) >= int(age_mx):
|
| 388 |
gr.Warning("Minimum Age must be less than Maximum Age.")
|
| 389 |
-
return
|
| 390 |
if float(sa_m) >= float(sa_mx):
|
| 391 |
gr.Warning("Minimum Sum Assured must be less than Maximum Sum Assured.")
|
| 392 |
-
return
|
| 393 |
-
|
| 394 |
if not p_terms:
|
| 395 |
gr.Warning("No Policy Terms selected. Using defaults: [10, 15, 20].")
|
| 396 |
|
| 397 |
-
gr.Info("Generating model points
|
| 398 |
|
| 399 |
# Generate data
|
| 400 |
df = generate_custom_model_points(
|
|
@@ -404,25 +364,16 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 404 |
# Generate analytics
|
| 405 |
insights = generate_business_insights(df)
|
| 406 |
summary_stats = generate_summary_statistics(df)
|
| 407 |
-
categorical_summary
|
| 408 |
-
age_fig, sa_fig, duration_fig = create_distribution_plots(df)
|
| 409 |
-
corr_fig, corr_matrix = create_correlation_analysis(df)
|
| 410 |
|
| 411 |
-
|
|
|
|
|
|
|
|
|
|
| 412 |
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
insights, # business_insights
|
| 417 |
-
summary_stats, # summary_stats_display
|
| 418 |
-
categorical_summary, # categorical_display
|
| 419 |
-
age_fig, # age_plot
|
| 420 |
-
sa_fig, # sa_plot
|
| 421 |
-
duration_fig, # duration_plot
|
| 422 |
-
terms_fig, # terms_plot
|
| 423 |
-
corr_fig, # correlation_plot
|
| 424 |
-
corr_matrix # correlation_matrix_display
|
| 425 |
-
)
|
| 426 |
|
| 427 |
def handle_download_button_click(current_df_to_download):
|
| 428 |
if current_df_to_download is None or current_df_to_download.empty:
|
|
@@ -444,16 +395,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 444 |
include_sex_input, policy_count_fixed_input
|
| 445 |
]
|
| 446 |
|
| 447 |
-
outputs_list = [
|
| 448 |
-
model_points_display, df_state, business_insights, summary_stats_display,
|
| 449 |
-
categorical_display, age_plot, sa_plot, duration_plot, terms_plot,
|
| 450 |
-
correlation_plot, correlation_matrix_display
|
| 451 |
-
]
|
| 452 |
-
|
| 453 |
generate_btn.click(
|
| 454 |
fn=handle_generate_button_click,
|
| 455 |
inputs=inputs_list,
|
| 456 |
-
outputs=
|
|
|
|
| 457 |
)
|
| 458 |
|
| 459 |
download_excel_btn.click(
|
|
@@ -462,5 +408,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 462 |
outputs=[download_excel_btn]
|
| 463 |
)
|
| 464 |
|
|
|
|
| 465 |
if __name__ == "__main__":
|
| 466 |
demo.launch()
|
|
|
|
| 4 |
from numpy.random import default_rng
|
| 5 |
import io # For BytesIO to handle file in memory
|
| 6 |
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# 1. Data Generation Function (customizable via UI filters)
|
| 9 |
def generate_custom_model_points(
|
|
|
|
| 55 |
# And ensure it's at least 1
|
| 56 |
duration_mth_col = np.maximum(1, duration_mth_col)
|
| 57 |
|
| 58 |
+
|
| 59 |
# Policy Count
|
| 60 |
if policy_count_fixed_val:
|
| 61 |
policy_count_col_val = np.ones(mp_count_val, dtype=int)
|
|
|
|
| 87 |
numerical_cols = ['age_at_entry', 'policy_term', 'policy_count', 'sum_assured', 'duration_mth']
|
| 88 |
summary_stats = df[numerical_cols].describe().round(2)
|
| 89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
return summary_stats
|
| 91 |
|
| 92 |
+
def create_age_distribution_plot(df):
|
| 93 |
+
"""Create age distribution plot with normal curve overlay."""
|
| 94 |
if df is None or df.empty:
|
| 95 |
+
return None
|
| 96 |
|
| 97 |
+
plt.figure(figsize=(10, 6))
|
|
|
|
| 98 |
|
| 99 |
+
# Create histogram
|
| 100 |
+
n, bins, patches = plt.hist(df['age_at_entry'], bins=20, density=True, alpha=0.7, color='skyblue', edgecolor='black')
|
| 101 |
|
| 102 |
+
# Calculate normal curve
|
| 103 |
age_mean = df['age_at_entry'].mean()
|
| 104 |
age_std = df['age_at_entry'].std()
|
| 105 |
+
x = np.linspace(df['age_at_entry'].min(), df['age_at_entry'].max(), 100)
|
|
|
|
|
|
|
| 106 |
|
| 107 |
+
# Manual normal distribution calculation
|
| 108 |
+
y = (1 / (age_std * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x - age_mean) / age_std) ** 2)
|
| 109 |
+
|
| 110 |
+
plt.plot(x, y, 'r-', linewidth=2, label=f'Normal Curve (μ={age_mean:.1f}, σ={age_std:.1f})')
|
| 111 |
+
|
| 112 |
+
plt.title('Age at Entry Distribution with Normal Curve Overlay')
|
| 113 |
plt.xlabel('Age at Entry')
|
| 114 |
plt.ylabel('Density')
|
| 115 |
plt.legend()
|
| 116 |
plt.grid(True, alpha=0.3)
|
| 117 |
plt.tight_layout()
|
| 118 |
|
| 119 |
+
return plt.gcf()
|
| 120 |
+
|
| 121 |
+
def create_sum_assured_plot(df):
|
| 122 |
+
"""Create sum assured distribution plot with normal curve overlay."""
|
| 123 |
+
if df is None or df.empty:
|
| 124 |
+
return None
|
| 125 |
+
|
| 126 |
+
plt.figure(figsize=(10, 6))
|
| 127 |
|
| 128 |
+
# Create histogram
|
| 129 |
plt.hist(df['sum_assured'], bins=30, density=True, alpha=0.7, color='lightgreen', edgecolor='black')
|
| 130 |
|
| 131 |
+
# Calculate normal curve
|
| 132 |
sa_mean = df['sum_assured'].mean()
|
| 133 |
sa_std = df['sum_assured'].std()
|
| 134 |
+
x = np.linspace(df['sum_assured'].min(), df['sum_assured'].max(), 100)
|
| 135 |
+
|
| 136 |
+
# Manual normal distribution calculation
|
| 137 |
+
y = (1 / (sa_std * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x - sa_mean) / sa_std) ** 2)
|
| 138 |
|
| 139 |
+
plt.plot(x, y, 'r-', linewidth=2, label=f'Normal Curve (μ=${sa_mean:,.0f}, σ=${sa_std:,.0f})')
|
| 140 |
+
|
| 141 |
+
plt.title('Sum Assured Distribution with Normal Curve Overlay')
|
| 142 |
plt.xlabel('Sum Assured ($)')
|
| 143 |
plt.ylabel('Density')
|
| 144 |
plt.legend()
|
| 145 |
plt.grid(True, alpha=0.3)
|
|
|
|
|
|
|
| 146 |
|
| 147 |
+
# Format x-axis to show currency
|
| 148 |
+
ax = plt.gca()
|
| 149 |
+
ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}'))
|
| 150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
plt.tight_layout()
|
| 152 |
|
| 153 |
+
return plt.gcf()
|
| 154 |
|
| 155 |
+
def create_policy_terms_plot(df):
|
| 156 |
+
"""Create policy terms distribution plot."""
|
| 157 |
if df is None or df.empty:
|
| 158 |
+
return None
|
| 159 |
|
| 160 |
+
plt.figure(figsize=(10, 6))
|
|
|
|
|
|
|
|
|
|
| 161 |
|
| 162 |
+
# Get policy term counts
|
| 163 |
+
term_counts = df['policy_term'].value_counts().sort_index()
|
| 164 |
+
|
| 165 |
+
bars = plt.bar(term_counts.index.astype(str), term_counts.values, color='gold', edgecolor='black', alpha=0.8)
|
| 166 |
+
|
| 167 |
+
plt.title('Policy Term Distribution')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
plt.xlabel('Policy Term (Years)')
|
| 169 |
plt.ylabel('Count')
|
| 170 |
plt.grid(True, alpha=0.3, axis='y')
|
|
|
|
| 177 |
|
| 178 |
plt.tight_layout()
|
| 179 |
|
| 180 |
+
return plt.gcf()
|
| 181 |
|
| 182 |
+
def generate_categorical_summary(df):
|
| 183 |
+
"""Generate categorical variable summary."""
|
| 184 |
if df is None or df.empty:
|
| 185 |
+
return pd.DataFrame()
|
| 186 |
|
| 187 |
+
# Sex distribution
|
| 188 |
+
sex_counts = df['sex'].value_counts()
|
| 189 |
+
sex_pct = (sex_counts / len(df) * 100).round(2)
|
| 190 |
+
|
| 191 |
+
# Policy term distribution
|
| 192 |
+
term_counts = df['policy_term'].value_counts().sort_index()
|
| 193 |
+
term_pct = (term_counts / len(df) * 100).round(2)
|
| 194 |
+
|
| 195 |
+
# Create summary DataFrame
|
| 196 |
+
summary_data = []
|
| 197 |
+
|
| 198 |
+
# Add sex distribution
|
| 199 |
+
summary_data.append(['Sex Distribution', '', '', ''])
|
| 200 |
+
for sex, count in sex_counts.items():
|
| 201 |
+
summary_data.append(['', sex, count, f'{sex_pct[sex]:.1f}%'])
|
| 202 |
|
| 203 |
+
# Add empty row
|
| 204 |
+
summary_data.append(['', '', '', ''])
|
| 205 |
|
| 206 |
+
# Add policy term distribution
|
| 207 |
+
summary_data.append(['Policy Term Distribution', '', '', ''])
|
| 208 |
+
for term, count in term_counts.items():
|
| 209 |
+
summary_data.append(['', f'{term} years', count, f'{term_pct[term]:.1f}%'])
|
| 210 |
+
|
| 211 |
+
summary_df = pd.DataFrame(summary_data, columns=['Category', 'Value', 'Count', 'Percentage'])
|
| 212 |
+
|
| 213 |
+
return summary_df
|
| 214 |
|
| 215 |
def generate_business_insights(df):
|
| 216 |
"""Generate business insights and key metrics."""
|
| 217 |
if df is None or df.empty:
|
| 218 |
+
return "No data available. Please generate model points first."
|
| 219 |
|
| 220 |
total_policies = len(df)
|
| 221 |
total_sum_assured = df['sum_assured'].sum()
|
|
|
|
| 228 |
term_percentage = (df['policy_term'] == most_common_term).mean() * 100
|
| 229 |
|
| 230 |
# Age groups
|
| 231 |
+
young_pct = ((df['age_at_entry'] <= 30).mean() * 100)
|
| 232 |
+
middle_pct = (((df['age_at_entry'] > 30) & (df['age_at_entry'] <= 50)).mean() * 100)
|
| 233 |
+
mature_pct = ((df['age_at_entry'] > 50).mean() * 100)
|
| 234 |
+
|
| 235 |
+
insights_text = f"""## Business Insights & Key Metrics
|
| 236 |
+
|
| 237 |
+
### Portfolio Overview
|
| 238 |
+
- **Total Policies Generated**: {total_policies:,}
|
| 239 |
+
- **Total Sum Assured**: ${total_sum_assured:,.0f}
|
| 240 |
+
- **Average Sum Assured**: ${avg_sum_assured:,.0f}
|
| 241 |
+
- **Average Issue Age**: {avg_age:.1f} years
|
| 242 |
+
- **Average Policy Duration**: {avg_duration:.1f} months ({avg_duration/12:.1f} years)
|
| 243 |
+
|
| 244 |
+
### Demographics
|
| 245 |
+
- **Young Policyholders (≤30)**: {young_pct:.1f}%
|
| 246 |
+
- **Middle-aged (31-50)**: {middle_pct:.1f}%
|
| 247 |
+
- **Mature (>50)**: {mature_pct:.1f}%
|
| 248 |
+
|
| 249 |
+
### Product Mix
|
| 250 |
+
- **Most Popular Term**: {most_common_term} years ({term_percentage:.1f}% of policies)
|
| 251 |
+
- **Policy Duration Range**: {df['duration_mth'].min()} - {df['duration_mth'].max()} months
|
| 252 |
+
- **Sum Assured Range**: ${df['sum_assured'].min():,.0f} - ${df['sum_assured'].max():,.0f}
|
| 253 |
+
"""
|
|
|
|
| 254 |
|
| 255 |
return insights_text
|
| 256 |
|
| 257 |
# 3. Gradio App Definition
|
| 258 |
+
with gr.Blocks() as demo:
|
| 259 |
+
gr.Markdown("# Actuarial Model Points Generator")
|
| 260 |
gr.Markdown(
|
| 261 |
"Configure the parameters below to generate a custom set of seriatim model points. "
|
| 262 |
+
"The generated table can be viewed and downloaded as an Excel file."
|
| 263 |
)
|
| 264 |
|
| 265 |
df_state = gr.State() # To hold the generated DataFrame
|
|
|
|
| 304 |
value=True, label="Fixed Policy Count = 1 (Uncheck for variable count 1-100)"
|
| 305 |
)
|
| 306 |
|
| 307 |
+
generate_btn = gr.Button("Generate Model Points", variant="primary")
|
| 308 |
|
| 309 |
with gr.Column(scale=2):
|
| 310 |
model_points_display = gr.Dataframe(label="Generated Model Points")
|
| 311 |
download_excel_btn = gr.DownloadButton(
|
| 312 |
+
label="Download Excel",
|
| 313 |
+
value="model_points.xlsx", # Default filename
|
| 314 |
variant="secondary"
|
| 315 |
)
|
| 316 |
|
| 317 |
# Analytics Section
|
| 318 |
gr.Markdown("---")
|
| 319 |
|
| 320 |
+
# Business Insights
|
| 321 |
+
business_insights_display = gr.Markdown("Generate model points to see business insights and analytics...")
|
|
|
|
| 322 |
|
| 323 |
with gr.Row():
|
| 324 |
with gr.Column():
|
| 325 |
+
gr.Markdown("### Summary Statistics")
|
| 326 |
summary_stats_display = gr.Dataframe(label="Descriptive Statistics")
|
| 327 |
with gr.Column():
|
| 328 |
+
gr.Markdown("### Categorical Analysis")
|
| 329 |
categorical_display = gr.Dataframe(label="Category Distributions")
|
| 330 |
|
| 331 |
with gr.Row():
|
| 332 |
with gr.Column():
|
| 333 |
+
gr.Markdown("### Age Distribution with Normal Curve")
|
| 334 |
+
age_plot = gr.Plot(label="Age Distribution")
|
| 335 |
with gr.Column():
|
| 336 |
+
gr.Markdown("### Sum Assured Distribution with Normal Curve")
|
| 337 |
+
sa_plot = gr.Plot(label="Sum Assured Distribution")
|
| 338 |
|
| 339 |
with gr.Row():
|
| 340 |
with gr.Column():
|
| 341 |
+
gr.Markdown("### Policy Term Distribution")
|
|
|
|
|
|
|
|
|
|
| 342 |
terms_plot = gr.Plot(label="Policy Terms")
|
| 343 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
# 4. Event Handlers
|
| 345 |
def handle_generate_button_click(
|
| 346 |
mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed
|
| 347 |
):
|
| 348 |
if int(age_m) >= int(age_mx):
|
| 349 |
gr.Warning("Minimum Age must be less than Maximum Age.")
|
| 350 |
+
return df_state.value, df_state.value, "Error: Invalid age range", pd.DataFrame(), pd.DataFrame(), None, None, None
|
| 351 |
if float(sa_m) >= float(sa_mx):
|
| 352 |
gr.Warning("Minimum Sum Assured must be less than Maximum Sum Assured.")
|
| 353 |
+
return df_state.value, df_state.value, "Error: Invalid sum assured range", pd.DataFrame(), pd.DataFrame(), None, None, None
|
|
|
|
| 354 |
if not p_terms:
|
| 355 |
gr.Warning("No Policy Terms selected. Using defaults: [10, 15, 20].")
|
| 356 |
|
| 357 |
+
gr.Info("Generating model points... Please wait.")
|
| 358 |
|
| 359 |
# Generate data
|
| 360 |
df = generate_custom_model_points(
|
|
|
|
| 364 |
# Generate analytics
|
| 365 |
insights = generate_business_insights(df)
|
| 366 |
summary_stats = generate_summary_statistics(df)
|
| 367 |
+
categorical_summary = generate_categorical_summary(df)
|
|
|
|
|
|
|
| 368 |
|
| 369 |
+
# Generate plots
|
| 370 |
+
age_fig = create_age_distribution_plot(df)
|
| 371 |
+
sa_fig = create_sum_assured_plot(df)
|
| 372 |
+
terms_fig = create_policy_terms_plot(df)
|
| 373 |
|
| 374 |
+
gr.Info(f"{len(df)} model points generated successfully!")
|
| 375 |
+
|
| 376 |
+
return df, df, insights, summary_stats, categorical_summary, age_fig, sa_fig, terms_fig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 377 |
|
| 378 |
def handle_download_button_click(current_df_to_download):
|
| 379 |
if current_df_to_download is None or current_df_to_download.empty:
|
|
|
|
| 395 |
include_sex_input, policy_count_fixed_input
|
| 396 |
]
|
| 397 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
generate_btn.click(
|
| 399 |
fn=handle_generate_button_click,
|
| 400 |
inputs=inputs_list,
|
| 401 |
+
outputs=[model_points_display, df_state, business_insights_display,
|
| 402 |
+
summary_stats_display, categorical_display, age_plot, sa_plot, terms_plot]
|
| 403 |
)
|
| 404 |
|
| 405 |
download_excel_btn.click(
|
|
|
|
| 408 |
outputs=[download_excel_btn]
|
| 409 |
)
|
| 410 |
|
| 411 |
+
|
| 412 |
if __name__ == "__main__":
|
| 413 |
demo.launch()
|