alidenewade commited on
Commit
d38b638
Β·
verified Β·
1 Parent(s): 25e7c74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +289 -17
app.py CHANGED
@@ -3,6 +3,16 @@ import pandas as pd
3
  import numpy as np
4
  from numpy.random import default_rng
5
  import io # For BytesIO to handle file in memory
 
 
 
 
 
 
 
 
 
 
6
 
7
  # 1. Data Generation Function (customizable via UI filters)
8
  def generate_custom_model_points(
@@ -54,7 +64,6 @@ def generate_custom_model_points(
54
  # And ensure it's at least 1
55
  duration_mth_col = np.maximum(1, duration_mth_col)
56
 
57
-
58
  # Policy Count
59
  if policy_count_fixed_val:
60
  policy_count_col_val = np.ones(mp_count_val, dtype=int)
@@ -76,12 +85,208 @@ def generate_custom_model_points(
76
 
77
  return model_point_df
78
 
79
- # 2. Gradio App Definition
80
- with gr.Blocks() as demo:
81
- gr.Markdown("# Actuarial Model Points Generator")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  gr.Markdown(
83
  "Configure the parameters below to generate a custom set of seriatim model points. "
84
- "The generated table can be viewed and downloaded as an Excel file."
85
  )
86
 
87
  df_state = gr.State() # To hold the generated DataFrame
@@ -126,36 +331,98 @@ with gr.Blocks() as demo:
126
  value=True, label="Fixed Policy Count = 1 (Uncheck for variable count 1-100)"
127
  )
128
 
129
- generate_btn = gr.Button("Generate Model Points", variant="primary")
130
 
131
  with gr.Column(scale=2):
132
  model_points_display = gr.Dataframe(label="Generated Model Points")
133
  download_excel_btn = gr.DownloadButton(
134
- label="Download Excel",
135
- value="model_points.xlsx", # Default filename
136
  variant="secondary"
137
  )
138
 
139
- # 3. Event Handlers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  def handle_generate_button_click(
141
  mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed
142
  ):
143
  if int(age_m) >= int(age_mx):
144
  gr.Warning("Minimum Age must be less than Maximum Age.")
145
- return df_state.value, df_state.value # Keep current table and state
146
  if float(sa_m) >= float(sa_mx):
147
  gr.Warning("Minimum Sum Assured must be less than Maximum Sum Assured.")
148
- return df_state.value, df_state.value
 
149
  if not p_terms:
150
  gr.Warning("No Policy Terms selected. Using defaults: [10, 15, 20].")
151
- # Generation function will handle default if p_terms is empty list
152
 
153
- gr.Info("Generating model points... Please wait.")
 
 
154
  df = generate_custom_model_points(
155
  mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed
156
  )
157
- gr.Info(f"{len(df)} model points generated successfully!")
158
- return df, df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
  def handle_download_button_click(current_df_to_download):
161
  if current_df_to_download is None or current_df_to_download.empty:
@@ -177,10 +444,16 @@ with gr.Blocks() as demo:
177
  include_sex_input, policy_count_fixed_input
178
  ]
179
 
 
 
 
 
 
 
180
  generate_btn.click(
181
  fn=handle_generate_button_click,
182
  inputs=inputs_list,
183
- outputs=[model_points_display, df_state]
184
  )
185
 
186
  download_excel_btn.click(
@@ -189,6 +462,5 @@ with gr.Blocks() as demo:
189
  outputs=[download_excel_btn]
190
  )
191
 
192
-
193
  if __name__ == "__main__":
194
  demo.launch()
 
3
  import numpy as np
4
  from numpy.random import default_rng
5
  import io # For BytesIO to handle file in memory
6
+ import matplotlib.pyplot as plt
7
+ import seaborn as sns
8
+ from scipy import stats
9
+ import plotly.express as px
10
+ import plotly.graph_objects as go
11
+ from plotly.subplots import make_subplots
12
+
13
+ # Set style for matplotlib
14
+ plt.style.use('default')
15
+ sns.set_palette("husl")
16
 
17
  # 1. Data Generation Function (customizable via UI filters)
18
  def generate_custom_model_points(
 
64
  # And ensure it's at least 1
65
  duration_mth_col = np.maximum(1, duration_mth_col)
66
 
 
67
  # Policy Count
68
  if policy_count_fixed_val:
69
  policy_count_col_val = np.ones(mp_count_val, dtype=int)
 
85
 
86
  return model_point_df
87
 
88
+ # 2. Analytics Functions
89
+ def generate_summary_statistics(df):
90
+ """Generate comprehensive summary statistics."""
91
+ if df is None or df.empty:
92
+ return pd.DataFrame()
93
+
94
+ # Numerical columns summary
95
+ numerical_cols = ['age_at_entry', 'policy_term', 'policy_count', 'sum_assured', 'duration_mth']
96
+ summary_stats = df[numerical_cols].describe().round(2)
97
+
98
+ # Add additional statistics
99
+ additional_stats = pd.DataFrame({
100
+ 'age_at_entry': [df['age_at_entry'].mode()[0], df['age_at_entry'].var()],
101
+ 'policy_term': [df['policy_term'].mode()[0], df['policy_term'].var()],
102
+ 'policy_count': [df['policy_count'].mode()[0], df['policy_count'].var()],
103
+ 'sum_assured': [df['sum_assured'].mode()[0], df['sum_assured'].var()],
104
+ 'duration_mth': [df['duration_mth'].mode()[0], df['duration_mth'].var()]
105
+ }, index=['mode', 'variance']).round(2)
106
+
107
+ summary_stats = pd.concat([summary_stats, additional_stats])
108
+ return summary_stats
109
+
110
+ def create_distribution_plots(df):
111
+ """Create distribution plots with normal curve overlay."""
112
+ if df is None or df.empty:
113
+ return None, None, None
114
+
115
+ # Age distribution with normal curve
116
+ fig_age = plt.figure(figsize=(10, 6))
117
+
118
+ # Histogram
119
+ plt.hist(df['age_at_entry'], bins=20, density=True, alpha=0.7, color='skyblue', edgecolor='black')
120
+
121
+ # Normal curve overlay
122
+ age_mean = df['age_at_entry'].mean()
123
+ age_std = df['age_at_entry'].std()
124
+ x_age = np.linspace(df['age_at_entry'].min(), df['age_at_entry'].max(), 100)
125
+ y_age = stats.norm.pdf(x_age, age_mean, age_std)
126
+ plt.plot(x_age, y_age, 'r-', linewidth=2, label=f'Normal Curve (ΞΌ={age_mean:.1f}, Οƒ={age_std:.1f})')
127
+
128
+ plt.title('Age at Entry Distribution with Normal Curve Overlay', fontsize=14, fontweight='bold')
129
+ plt.xlabel('Age at Entry')
130
+ plt.ylabel('Density')
131
+ plt.legend()
132
+ plt.grid(True, alpha=0.3)
133
+ plt.tight_layout()
134
+
135
+ # Sum Assured distribution with normal curve
136
+ fig_sa = plt.figure(figsize=(10, 6))
137
+
138
+ # Histogram
139
+ plt.hist(df['sum_assured'], bins=30, density=True, alpha=0.7, color='lightgreen', edgecolor='black')
140
+
141
+ # Normal curve overlay
142
+ sa_mean = df['sum_assured'].mean()
143
+ sa_std = df['sum_assured'].std()
144
+ x_sa = np.linspace(df['sum_assured'].min(), df['sum_assured'].max(), 100)
145
+ y_sa = stats.norm.pdf(x_sa, sa_mean, sa_std)
146
+ plt.plot(x_sa, y_sa, 'r-', linewidth=2, label=f'Normal Curve (ΞΌ=${sa_mean:,.0f}, Οƒ=${sa_std:,.0f})')
147
+
148
+ plt.title('Sum Assured Distribution with Normal Curve Overlay', fontsize=14, fontweight='bold')
149
+ plt.xlabel('Sum Assured ($)')
150
+ plt.ylabel('Density')
151
+ plt.legend()
152
+ plt.grid(True, alpha=0.3)
153
+ plt.gca().xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}'))
154
+ plt.tight_layout()
155
+
156
+ # Duration distribution
157
+ fig_duration = plt.figure(figsize=(10, 6))
158
+
159
+ # Histogram
160
+ plt.hist(df['duration_mth'], bins=25, density=True, alpha=0.7, color='lightcoral', edgecolor='black')
161
+
162
+ # Normal curve overlay
163
+ dur_mean = df['duration_mth'].mean()
164
+ dur_std = df['duration_mth'].std()
165
+ x_dur = np.linspace(df['duration_mth'].min(), df['duration_mth'].max(), 100)
166
+ y_dur = stats.norm.pdf(x_dur, dur_mean, dur_std)
167
+ plt.plot(x_dur, y_dur, 'r-', linewidth=2, label=f'Normal Curve (ΞΌ={dur_mean:.1f}, Οƒ={dur_std:.1f})')
168
+
169
+ plt.title('Policy Duration (Months) Distribution with Normal Curve Overlay', fontsize=14, fontweight='bold')
170
+ plt.xlabel('Duration (Months)')
171
+ plt.ylabel('Density')
172
+ plt.legend()
173
+ plt.grid(True, alpha=0.3)
174
+ plt.tight_layout()
175
+
176
+ return fig_age, fig_sa, fig_duration
177
+
178
+ def create_categorical_analysis(df):
179
+ """Create categorical variable analysis."""
180
+ if df is None or df.empty:
181
+ return pd.DataFrame(), None
182
+
183
+ # Sex distribution
184
+ sex_dist = df['sex'].value_counts().reset_index()
185
+ sex_dist.columns = ['Sex', 'Count']
186
+ sex_dist['Percentage'] = (sex_dist['Count'] / len(df) * 100).round(2)
187
+
188
+ # Policy term distribution
189
+ term_dist = df['policy_term'].value_counts().sort_index().reset_index()
190
+ term_dist.columns = ['Policy_Term', 'Count']
191
+ term_dist['Percentage'] = (term_dist['Count'] / len(df) * 100).round(2)
192
+
193
+ # Combined categorical summary
194
+ categorical_summary = pd.DataFrame({
195
+ 'Variable': ['Sex Distribution', '', 'Policy Term Distribution'] + [''] * (len(term_dist) - 1),
196
+ 'Category': [''] + list(sex_dist['Sex']) + [''] + list(term_dist['Policy_Term'].astype(str) + ' years'),
197
+ 'Count': [''] + list(sex_dist['Count']) + [''] + list(term_dist['Count']),
198
+ 'Percentage': [''] + list(sex_dist['Percentage'].astype(str) + '%') + [''] + list(term_dist['Percentage'].astype(str) + '%')
199
+ })
200
+
201
+ # Create bar plot for policy terms
202
+ fig_terms = plt.figure(figsize=(10, 6))
203
+ bars = plt.bar(term_dist['Policy_Term'].astype(str), term_dist['Count'], color='gold', edgecolor='black', alpha=0.8)
204
+ plt.title('Policy Term Distribution', fontsize=14, fontweight='bold')
205
+ plt.xlabel('Policy Term (Years)')
206
+ plt.ylabel('Count')
207
+ plt.grid(True, alpha=0.3, axis='y')
208
+
209
+ # Add count labels on bars
210
+ for bar in bars:
211
+ height = bar.get_height()
212
+ plt.text(bar.get_x() + bar.get_width()/2., height,
213
+ f'{int(height)}', ha='center', va='bottom')
214
+
215
+ plt.tight_layout()
216
+
217
+ return categorical_summary, fig_terms
218
+
219
+ def create_correlation_analysis(df):
220
+ """Create correlation analysis."""
221
+ if df is None or df.empty:
222
+ return None, pd.DataFrame()
223
+
224
+ # Select numerical columns for correlation
225
+ numerical_cols = ['age_at_entry', 'policy_term', 'policy_count', 'sum_assured', 'duration_mth']
226
+ corr_matrix = df[numerical_cols].corr()
227
+
228
+ # Create correlation heatmap
229
+ fig_corr = plt.figure(figsize=(10, 8))
230
+ mask = np.triu(np.ones_like(corr_matrix, dtype=bool))
231
+ sns.heatmap(corr_matrix, mask=mask, annot=True, cmap='RdYlBu_r', center=0,
232
+ square=True, fmt='.3f', cbar_kws={"shrink": .8})
233
+ plt.title('Correlation Matrix of Numerical Variables', fontsize=14, fontweight='bold')
234
+ plt.tight_layout()
235
+
236
+ # Create correlation summary table
237
+ corr_summary = corr_matrix.round(3)
238
+
239
+ return fig_corr, corr_summary
240
+
241
+ def generate_business_insights(df):
242
+ """Generate business insights and key metrics."""
243
+ if df is None or df.empty:
244
+ return ""
245
+
246
+ total_policies = len(df)
247
+ total_sum_assured = df['sum_assured'].sum()
248
+ avg_sum_assured = df['sum_assured'].mean()
249
+ avg_age = df['age_at_entry'].mean()
250
+ avg_duration = df['duration_mth'].mean()
251
+
252
+ # Most common policy term
253
+ most_common_term = df['policy_term'].mode()[0]
254
+ term_percentage = (df['policy_term'] == most_common_term).mean() * 100
255
+
256
+ # Age groups
257
+ young_pct = ((df['age_at_entry'] <= 30).mean() * 100).round(1)
258
+ middle_pct = (((df['age_at_entry'] > 30) & (df['age_at_entry'] <= 50)).mean() * 100).round(1)
259
+ mature_pct = ((df['age_at_entry'] > 50).mean() * 100).round(1)
260
+
261
+ insights_text = f"""
262
+ ## πŸ“Š Business Insights & Key Metrics
263
+
264
+ ### Portfolio Overview
265
+ - **Total Policies Generated**: {total_policies:,}
266
+ - **Total Sum Assured**: ${total_sum_assured:,.0f}
267
+ - **Average Sum Assured**: ${avg_sum_assured:,.0f}
268
+ - **Average Issue Age**: {avg_age:.1f} years
269
+ - **Average Policy Duration**: {avg_duration:.1f} months ({avg_duration/12:.1f} years)
270
+
271
+ ### Demographics
272
+ - **Young Policyholders (≀30)**: {young_pct}%
273
+ - **Middle-aged (31-50)**: {middle_pct}%
274
+ - **Mature (>50)**: {mature_pct}%
275
+
276
+ ### Product Mix
277
+ - **Most Popular Term**: {most_common_term} years ({term_percentage:.1f}% of policies)
278
+ - **Policy Duration Range**: {df['duration_mth'].min()} - {df['duration_mth'].max()} months
279
+ - **Sum Assured Range**: ${df['sum_assured'].min():,.0f} - ${df['sum_assured'].max():,.0f}
280
+ """
281
+
282
+ return insights_text
283
+
284
+ # 3. Gradio App Definition
285
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
286
+ gr.Markdown("# 🎯 Actuarial Model Points Generator with Analytics")
287
  gr.Markdown(
288
  "Configure the parameters below to generate a custom set of seriatim model points. "
289
+ "The generated table can be viewed and downloaded as an Excel file, complete with comprehensive analytics and insights."
290
  )
291
 
292
  df_state = gr.State() # To hold the generated DataFrame
 
331
  value=True, label="Fixed Policy Count = 1 (Uncheck for variable count 1-100)"
332
  )
333
 
334
+ generate_btn = gr.Button("Generate Model Points", variant="primary", size="lg")
335
 
336
  with gr.Column(scale=2):
337
  model_points_display = gr.Dataframe(label="Generated Model Points")
338
  download_excel_btn = gr.DownloadButton(
339
+ label="πŸ“₯ Download Excel",
340
+ value="model_points.xlsx",
341
  variant="secondary"
342
  )
343
 
344
+ # Analytics Section
345
+ gr.Markdown("---")
346
+
347
+ with gr.Row():
348
+ with gr.Column():
349
+ business_insights = gr.Markdown("Generate model points to see business insights...")
350
+
351
+ with gr.Row():
352
+ with gr.Column():
353
+ gr.Markdown("### πŸ“ˆ Summary Statistics")
354
+ summary_stats_display = gr.Dataframe(label="Descriptive Statistics")
355
+ with gr.Column():
356
+ gr.Markdown("### 🏷️ Categorical Analysis")
357
+ categorical_display = gr.Dataframe(label="Category Distributions")
358
+
359
+ with gr.Row():
360
+ with gr.Column():
361
+ gr.Markdown("### πŸ“Š Age Distribution")
362
+ age_plot = gr.Plot(label="Age Distribution with Normal Curve")
363
+ with gr.Column():
364
+ gr.Markdown("### πŸ’° Sum Assured Distribution")
365
+ sa_plot = gr.Plot(label="Sum Assured Distribution with Normal Curve")
366
+
367
+ with gr.Row():
368
+ with gr.Column():
369
+ gr.Markdown("### ⏱️ Duration Distribution")
370
+ duration_plot = gr.Plot(label="Duration Distribution with Normal Curve")
371
+ with gr.Column():
372
+ gr.Markdown("### πŸ“‹ Policy Term Distribution")
373
+ terms_plot = gr.Plot(label="Policy Terms")
374
+
375
+ with gr.Row():
376
+ with gr.Column():
377
+ gr.Markdown("### πŸ”— Correlation Analysis")
378
+ correlation_plot = gr.Plot(label="Correlation Heatmap")
379
+ with gr.Column():
380
+ gr.Markdown("### πŸ“‹ Correlation Matrix")
381
+ correlation_matrix_display = gr.Dataframe(label="Correlation Coefficients")
382
+
383
+ # 4. Event Handlers
384
  def handle_generate_button_click(
385
  mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed
386
  ):
387
  if int(age_m) >= int(age_mx):
388
  gr.Warning("Minimum Age must be less than Maximum Age.")
389
+ return [df_state.value] * 10 # Return current state for all outputs
390
  if float(sa_m) >= float(sa_mx):
391
  gr.Warning("Minimum Sum Assured must be less than Maximum Sum Assured.")
392
+ return [df_state.value] * 10
393
+
394
  if not p_terms:
395
  gr.Warning("No Policy Terms selected. Using defaults: [10, 15, 20].")
 
396
 
397
+ gr.Info("Generating model points and analytics... Please wait.")
398
+
399
+ # Generate data
400
  df = generate_custom_model_points(
401
  mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed
402
  )
403
+
404
+ # Generate analytics
405
+ insights = generate_business_insights(df)
406
+ summary_stats = generate_summary_statistics(df)
407
+ categorical_summary, terms_fig = create_categorical_analysis(df)
408
+ age_fig, sa_fig, duration_fig = create_distribution_plots(df)
409
+ corr_fig, corr_matrix = create_correlation_analysis(df)
410
+
411
+ gr.Info(f"βœ… {len(df)} model points generated successfully with complete analytics!")
412
+
413
+ return (
414
+ df, # model_points_display
415
+ df, # df_state
416
+ insights, # business_insights
417
+ summary_stats, # summary_stats_display
418
+ categorical_summary, # categorical_display
419
+ age_fig, # age_plot
420
+ sa_fig, # sa_plot
421
+ duration_fig, # duration_plot
422
+ terms_fig, # terms_plot
423
+ corr_fig, # correlation_plot
424
+ corr_matrix # correlation_matrix_display
425
+ )
426
 
427
  def handle_download_button_click(current_df_to_download):
428
  if current_df_to_download is None or current_df_to_download.empty:
 
444
  include_sex_input, policy_count_fixed_input
445
  ]
446
 
447
+ outputs_list = [
448
+ model_points_display, df_state, business_insights, summary_stats_display,
449
+ categorical_display, age_plot, sa_plot, duration_plot, terms_plot,
450
+ correlation_plot, correlation_matrix_display
451
+ ]
452
+
453
  generate_btn.click(
454
  fn=handle_generate_button_click,
455
  inputs=inputs_list,
456
+ outputs=outputs_list
457
  )
458
 
459
  download_excel_btn.click(
 
462
  outputs=[download_excel_btn]
463
  )
464
 
 
465
  if __name__ == "__main__":
466
  demo.launch()