alidenewade commited on
Commit
4a7f563
·
verified ·
1 Parent(s): b4e00d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -367
app.py CHANGED
@@ -1,394 +1,123 @@
1
  import gradio as gr
2
  import pandas as pd
3
  import numpy as np
4
- import matplotlib.pyplot as plt
5
- import seaborn as sns
6
  from numpy.random import default_rng
7
- import plotly.express as px
8
- import plotly.graph_objects as go
9
- from plotly.subplots import make_subplots
10
- import warnings
11
- warnings.filterwarnings('ignore')
12
 
13
- # Set style for matplotlib
14
- plt.style.use('default')
15
- sns.set_palette("husl")
16
-
17
- def generate_model_points(mp_count=10000, age_min=20, age_max=59,
18
- sum_assured_min=10000, sum_assured_max=1000000,
19
- policy_terms=[10, 15, 20], include_sex=True,
20
- policy_count_fixed=True, seed=12345):
21
  """
22
- Generate seriatim model points for actuarial analysis
 
23
  """
24
- # Set random seed for reproducibility
25
- rng = default_rng(seed)
26
 
27
- # Issue Age (Integer): age_min - age_max year old
28
- age_at_entry = rng.integers(low=age_min, high=age_max+1, size=mp_count)
29
 
30
- # Sex (Char) - optional
31
- if include_sex:
32
- Sex = ["M", "F"]
33
- sex = np.fromiter(map(lambda i: Sex[i], rng.integers(low=0, high=len(Sex), size=mp_count)), np.dtype('<U1'))
34
- else:
35
- sex = np.full(mp_count, "U") # Unknown/Unspecified
36
 
37
- # Policy Term (Integer): from policy_terms list
38
- policy_term_options = np.array(policy_terms)
39
- policy_term = rng.choice(policy_term_options, size=mp_count)
40
 
41
- # Sum Assured (Float): sum_assured_min - sum_assured_max
42
- sum_assured = np.round((sum_assured_max - sum_assured_min) * rng.random(size=mp_count) + sum_assured_min, -3)
43
 
44
- # Duration in month (Int): 1 <= Duration(mth) < Policy Term in month
45
- duration_mth = np.floor((policy_term * 12 - 1) * rng.random(size=mp_count)).astype(int) + 1
 
46
 
47
- # Policy Count (Integer): 1 (fixed) or variable
48
- if policy_count_fixed:
49
- policy_count = np.ones(mp_count, dtype=int)
50
- else:
51
- policy_count = rng.integers(low=1, high=101, size=mp_count)
52
 
53
  # Create DataFrame
54
- attrs = ["age_at_entry", "sex", "policy_term", "policy_count", "sum_assured", "duration_mth"]
55
- data = [age_at_entry, sex, policy_term, policy_count, sum_assured, duration_mth]
56
-
57
- model_point_table = pd.DataFrame(dict(zip(attrs, data)), index=range(1, mp_count+1))
58
- model_point_table.index.name = "policy_id"
59
-
60
- return model_point_table
61
-
62
- def create_summary_stats(df):
63
- """Generate summary statistics for the model points"""
64
- summary_stats = []
65
-
66
- # Numeric columns
67
- numeric_cols = ['age_at_entry', 'policy_term', 'sum_assured', 'duration_mth', 'policy_count']
68
-
69
- for col in numeric_cols:
70
- if col in df.columns:
71
- stats = {
72
- 'Variable': col.replace('_', ' ').title(),
73
- 'Count': f"{len(df[col]):,}",
74
- 'Mean': f"{df[col].mean():.2f}",
75
- 'Std Dev': f"{df[col].std():.2f}",
76
- 'Min': f"{df[col].min():,.0f}",
77
- 'Max': f"{df[col].max():,.0f}",
78
- 'Median': f"{df[col].median():.2f}"
79
- }
80
- summary_stats.append(stats)
81
-
82
- # Categorical columns
83
- if 'sex' in df.columns:
84
- sex_counts = df['sex'].value_counts()
85
- for sex_val, count in sex_counts.items():
86
- stats = {
87
- 'Variable': f'Sex ({sex_val})',
88
- 'Count': f"{count:,}",
89
- 'Mean': f"{count/len(df)*100:.1f}%",
90
- 'Std Dev': '-',
91
- 'Min': '-',
92
- 'Max': '-',
93
- 'Median': '-'
94
- }
95
- summary_stats.append(stats)
96
-
97
- return pd.DataFrame(summary_stats)
98
-
99
- def create_distribution_plots(df):
100
- """Create distribution plots for key variables"""
101
- fig = make_subplots(
102
- rows=2, cols=3,
103
- subplot_titles=('Age at Entry', 'Policy Term', 'Sum Assured',
104
- 'Duration (Months)', 'Policy Count', 'Sex Distribution'),
105
- specs=[[{'type': 'histogram'}, {'type': 'histogram'}, {'type': 'histogram'}],
106
- [{'type': 'histogram'}, {'type': 'histogram'}, {'type': 'bar'}]]
107
- )
108
-
109
- # Age at Entry
110
- fig.add_trace(
111
- go.Histogram(x=df['age_at_entry'], name='Age at Entry', nbinsx=20),
112
- row=1, col=1
113
- )
114
-
115
- # Policy Term
116
- fig.add_trace(
117
- go.Histogram(x=df['policy_term'], name='Policy Term', nbinsx=10),
118
- row=1, col=2
119
- )
120
-
121
- # Sum Assured
122
- fig.add_trace(
123
- go.Histogram(x=df['sum_assured'], name='Sum Assured', nbinsx=30),
124
- row=1, col=3
125
  )
126
 
127
- # Duration in Months
128
- fig.add_trace(
129
- go.Histogram(x=df['duration_mth'], name='Duration (Months)', nbinsx=25),
130
- row=2, col=1
131
- )
132
-
133
- # Policy Count
134
- fig.add_trace(
135
- go.Histogram(x=df['policy_count'], name='Policy Count', nbinsx=20),
136
- row=2, col=2
137
- )
138
-
139
- # Sex Distribution
140
- if 'sex' in df.columns:
141
- sex_counts = df['sex'].value_counts()
142
- fig.add_trace(
143
- go.Bar(x=sex_counts.index, y=sex_counts.values, name='Sex Distribution'),
144
- row=2, col=3
145
- )
146
-
147
- fig.update_layout(
148
- height=800,
149
- title_text="Model Points Distribution Analysis",
150
- showlegend=False
151
- )
152
-
153
- return fig
154
-
155
- def create_correlation_heatmap(df):
156
- """Create correlation heatmap for numeric variables"""
157
- numeric_cols = ['age_at_entry', 'policy_term', 'sum_assured', 'duration_mth', 'policy_count']
158
- available_cols = [col for col in numeric_cols if col in df.columns]
159
-
160
- if len(available_cols) > 1:
161
- corr_matrix = df[available_cols].corr()
162
-
163
- fig = go.Figure(data=go.Heatmap(
164
- z=corr_matrix.values,
165
- x=corr_matrix.columns,
166
- y=corr_matrix.columns,
167
- colorscale='RdBu',
168
- zmid=0,
169
- text=corr_matrix.values.round(3),
170
- texttemplate='%{text}',
171
- textfont={"size": 12},
172
- hoverongaps=False
173
- ))
174
-
175
- fig.update_layout(
176
- title='Correlation Matrix of Model Point Variables',
177
- width=600,
178
- height=500
179
- )
180
-
181
- return fig
182
- else:
183
- return go.Figure().add_annotation(text="Not enough numeric variables for correlation analysis")
184
-
185
- def create_age_term_analysis(df):
186
- """Create age vs policy term analysis"""
187
- fig = px.box(df, x='policy_term', y='age_at_entry',
188
- title='Age at Entry Distribution by Policy Term',
189
- labels={'policy_term': 'Policy Term (Years)', 'age_at_entry': 'Age at Entry'})
190
-
191
- fig.update_layout(height=400)
192
- return fig
193
-
194
- def create_portfolio_metrics(df):
195
- """Calculate portfolio-level metrics"""
196
- metrics = {}
197
-
198
- # Total exposure
199
- metrics['Total Policies'] = f"{len(df):,}"
200
- metrics['Total Sum Assured'] = f"${df['sum_assured'].sum():,.0f}"
201
- metrics['Average Sum Assured'] = f"${df['sum_assured'].mean():,.0f}"
202
-
203
- # Age metrics
204
- metrics['Average Age at Entry'] = f"{df['age_at_entry'].mean():.1f} years"
205
- metrics['Age Range'] = f"{df['age_at_entry'].min()}-{df['age_at_entry'].max()} years"
206
-
207
- # Policy term metrics
208
- metrics['Average Policy Term'] = f"{df['policy_term'].mean():.1f} years"
209
- term_dist = df['policy_term'].value_counts().sort_index()
210
- metrics['Policy Term Distribution'] = ', '.join([f"{term}Y: {count:,}" for term, count in term_dist.items()])
211
-
212
- # Duration metrics
213
- metrics['Average Duration'] = f"{df['duration_mth'].mean():.1f} months"
214
- metrics['Duration Range'] = f"{df['duration_mth'].min()}-{df['duration_mth'].max()} months"
215
-
216
- # Convert to DataFrame for display
217
- metrics_df = pd.DataFrame(list(metrics.items()), columns=['Metric', 'Value'])
218
- return metrics_df
219
-
220
- def export_to_csv(df):
221
- """Export dataframe to CSV string"""
222
- return df.to_csv()
223
-
224
- # Create the Gradio interface
225
- with gr.Blocks(title="Actuarial Model Points Generator") as demo:
226
- gr.Markdown("""
227
- # 📊 Actuarial Model Points Generator
228
-
229
- Generate synthetic seriatim policy data for actuarial modeling, cluster analysis, and portfolio testing.
230
- Perfect for creating realistic test datasets for insurance product development and risk analysis.
231
- """)
232
 
 
233
  with gr.Row():
234
- with gr.Column(scale=1):
235
- gr.Markdown("### Generation Parameters")
236
-
237
- # Basic parameters
238
- mp_count = gr.Slider(
239
- minimum=100, maximum=50000, value=10000, step=100,
240
- label="Number of Model Points"
241
- )
242
-
243
- seed = gr.Number(
244
- value=12345, precision=0,
245
- label="Random Seed (for reproducibility)"
246
- )
247
-
248
- # Age parameters
249
- gr.Markdown("#### Age Parameters")
250
- age_min = gr.Slider(
251
- minimum=18, maximum=40, value=20, step=1,
252
- label="Minimum Age at Entry"
253
- )
254
- age_max = gr.Slider(
255
- minimum=45, maximum=80, value=59, step=1,
256
- label="Maximum Age at Entry"
257
- )
258
-
259
- # Sum Assured parameters
260
- gr.Markdown("#### Sum Assured Parameters")
261
- sum_assured_min = gr.Number(
262
- value=10000,
263
- label="Minimum Sum Assured ($)"
264
- )
265
- sum_assured_max = gr.Number(
266
- value=1000000,
267
- label="Maximum Sum Assured ($)"
268
- )
269
-
270
- # Policy options
271
- gr.Markdown("#### Policy Options")
272
- policy_terms = gr.CheckboxGroup(
273
- choices=[5, 10, 15, 20, 25, 30],
274
- value=[10, 15, 20],
275
- label="Available Policy Terms (Years)"
276
- )
277
-
278
- include_sex = gr.Checkbox(
279
- value=True,
280
- label="Include Sex (M/F) in model points"
281
- )
282
-
283
- policy_count_fixed = gr.Checkbox(
284
- value=True,
285
- label="Fixed Policy Count = 1 (uncheck for variable 1-100)"
286
- )
287
-
288
- generate_btn = gr.Button("🎲 Generate Model Points", variant="primary")
289
-
290
- with gr.Column(scale=2):
291
- with gr.Tabs():
292
- with gr.TabItem("📋 Data Table"):
293
- model_points_table = gr.Dataframe(
294
- label="Generated Model Points",
295
- # height=400, <-- This line was removed/commented out
296
- interactive=False
297
- )
298
-
299
- download_btn = gr.DownloadButton(
300
- label="📥 Download CSV",
301
- variant="secondary"
302
- )
303
-
304
- with gr.TabItem("📊 Distributions"):
305
- distribution_plot = gr.Plot(label="Variable Distributions")
306
-
307
- with gr.TabItem("📈 Analytics"):
308
- with gr.Row():
309
- correlation_plot = gr.Plot(label="Correlation Analysis")
310
- age_term_plot = gr.Plot(label="Age vs Policy Term")
311
-
312
- with gr.TabItem("📋 Statistics"):
313
- with gr.Row():
314
- with gr.Column():
315
- portfolio_metrics = gr.Dataframe(label="Portfolio Metrics")
316
- with gr.Column():
317
- summary_stats = gr.Dataframe(label="Summary Statistics")
318
-
319
- gr.Markdown("""
320
- ### 🎯 Use Cases
321
-
322
- **Actuarial Applications:**
323
- - **Cluster Analysis**: Group similar policies for pricing and reserving
324
- - **Portfolio Testing**: Stress test models with synthetic data
325
- - **Product Development**: Analyze policy mix and profitability
326
- - **Risk Management**: Understand exposure concentrations
327
-
328
- **Key Features:**
329
- - **Realistic Distributions**: Age, term, and sum assured follow typical insurance patterns
330
- - **Existing Policies**: Duration > 0 represents in-force business
331
- - **Flexible Parameters**: Customize age ranges, policy terms, and sum assured limits
332
- - **Reproducible**: Fixed seed ensures consistent results
333
-
334
- **Generated Variables:**
335
- - `policy_id`: Unique identifier for each policy
336
- - `age_at_entry`: Issue age (customizable range)
337
- - `sex`: M/F indicator (optional)
338
- - `policy_term`: Term in years (selectable options)
339
- - `policy_count`: Number of policies (1 or variable)
340
- - `sum_assured`: Coverage amount (customizable range)
341
- - `duration_mth`: Months since issue (1 to term-1)
342
- """)
343
-
344
- # Event handlers
345
- def generate_and_analyze(mp_count, age_min, age_max, sum_assured_min, sum_assured_max,
346
- policy_terms, include_sex, policy_count_fixed, seed):
347
- """Generate model points and all analyses"""
348
- if not policy_terms:
349
- policy_terms = [10, 15, 20] # Default if none selected
350
-
351
- # Generate model points
352
- df = generate_model_points(
353
- mp_count=int(mp_count),
354
- age_min=int(age_min),
355
- age_max=int(age_max),
356
- sum_assured_min=sum_assured_min,
357
- sum_assured_max=sum_assured_max,
358
- policy_terms=policy_terms,
359
- include_sex=include_sex,
360
- policy_count_fixed=policy_count_fixed,
361
- seed=int(seed)
362
- )
363
-
364
- # Generate analyses
365
- dist_plot = create_distribution_plots(df)
366
- corr_plot = create_correlation_heatmap(df)
367
- age_term_plot = create_age_term_analysis(df)
368
- portfolio_metrics_df = create_portfolio_metrics(df)
369
- summary_stats_df = create_summary_stats(df)
370
- csv_data = export_to_csv(df)
371
-
372
- return (df, dist_plot, corr_plot, age_term_plot,
373
- portfolio_metrics_df, summary_stats_df, csv_data)
374
 
375
- # Connect the generate button
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  generate_btn.click(
377
- fn=generate_and_analyze,
378
- inputs=[mp_count, age_min, age_max, sum_assured_min, sum_assured_max,
379
- policy_terms, include_sex, policy_count_fixed, seed],
380
- outputs=[model_points_table, distribution_plot, correlation_plot,
381
- age_term_plot, portfolio_metrics, summary_stats, download_btn]
382
  )
383
 
384
- # Initialize with default values
385
- demo.load(
386
- fn=generate_and_analyze,
387
- inputs=[mp_count, age_min, age_max, sum_assured_min, sum_assured_max,
388
- policy_terms, include_sex, policy_count_fixed, seed],
389
- outputs=[model_points_table, distribution_plot, correlation_plot,
390
- age_term_plot, portfolio_metrics, summary_stats, download_btn]
391
  )
 
 
 
 
392
 
393
  if __name__ == "__main__":
394
  demo.launch()
 
1
  import gradio as gr
2
  import pandas as pd
3
  import numpy as np
 
 
4
  from numpy.random import default_rng
5
+ import io # For BytesIO to handle file in memory
 
 
 
 
6
 
7
+ # 1. Data Generation Function (adapted from your script)
8
+ def generate_cluster_model_points():
 
 
 
 
 
 
9
  """
10
+ Generates seriatim model points based on the specifications
11
+ from generate_model_points_for_cluster.py.
12
  """
13
+ rng = default_rng(12345) # Fixed seed for reproducibility
14
+ MPCount = 10000 # Number of Model Points
15
 
16
+ # Issue Age (Integer): 20 - 59 year old
17
+ age_at_entry = rng.integers(low=20, high=60, size=MPCount)
18
 
19
+ # Sex (Char)
20
+ sex_options = ["M", "F"]
21
+ sex_col = np.fromiter(map(lambda i: sex_options[i], rng.integers(low=0, high=len(sex_options), size=MPCount)), np.dtype('<U1'))
 
 
 
22
 
23
+ # Policy Term (Integer): 10, 15, 20
24
+ policy_term_col = rng.integers(low=0, high=3, size=MPCount) * 5 + 10
 
25
 
26
+ # Sum Assured (Float): 10,000 - 1,000,000
27
+ sum_assured_col = np.round((1000000 - 10000) * rng.random(size=MPCount) + 10000, -3)
28
 
29
+ # Duration in month (Int): 0 < Duration(mth) < Policy Term in month
30
+ # Ensures duration_mth is at least 1 and less than policy_term_col in months.
31
+ duration_mth_col = np.floor((policy_term_col * 12 - 1) * rng.random(size=MPCount)).astype(int) + 1
32
 
33
+ # Policy Count (Integer): 1 for all model points
34
+ policy_count_col = 1
 
 
 
35
 
36
  # Create DataFrame
37
+ data_dict = {
38
+ "age_at_entry": age_at_entry,
39
+ "sex": sex_col,
40
+ "policy_term": policy_term_col,
41
+ "policy_count": policy_count_col, # Pandas will broadcast this scalar to all rows
42
+ "sum_assured": sum_assured_col,
43
+ "duration_mth": duration_mth_col
44
+ }
45
+
46
+ # Create index named "policy_id" starting from 1
47
+ model_point_df = pd.DataFrame(data_dict, index=pd.RangeIndex(start=1, stop=MPCount + 1, name="policy_id"))
48
+
49
+ return model_point_df
50
+
51
+ # 2. Gradio App Definition
52
+ with gr.Blocks() as demo: # Default theme and font
53
+ gr.Markdown("# Actuarial Model Points Generator (Cluster Version)")
54
+ gr.Markdown(
55
+ "This app generates 10,000 seriatim model points based on the logic from the "
56
+ "`generate_model_points_for_cluster.py` script.\n"
57
+ "Click 'Generate Model Points' to view the table, then 'Download Excel' to save the data."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  )
59
 
60
+ # State to store the generated DataFrame
61
+ df_state = gr.State()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
+ # UI Elements
64
  with gr.Row():
65
+ generate_btn = gr.Button("Generate Model Points", variant="primary")
66
+
67
+ model_points_display = gr.Dataframe(label="Generated Model Points")
68
+
69
+ download_excel_btn = gr.DownloadButton(
70
+ label="Download Excel",
71
+ value="model_points.xlsx", # Sets the default filename for download
72
+ variant="secondary"
73
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
+ # 3. Event Handlers
76
+ def handle_generate_button_click():
77
+ """
78
+ Called when the 'Generate Model Points' button is clicked.
79
+ Generates data and updates the UI.
80
+ """
81
+ gr.Info("Generating model points... Please wait.")
82
+ df = generate_cluster_model_points()
83
+ gr.Info(f"{len(df)} model points generated successfully!")
84
+ return df, df # Update both the Dataframe display and the state
85
+
86
+ def handle_download_button_click(current_df_to_download):
87
+ """
88
+ Called when the 'Download Excel' button is clicked.
89
+ Prepares the DataFrame for download as an Excel file.
90
+ """
91
+ if current_df_to_download is None or current_df_to_download.empty:
92
+ gr.Warning("No data available to download. Please generate model points first.")
93
+ # Provide an empty Excel file to prevent download error if button is clicked prematurely
94
+ empty_excel_output = io.BytesIO()
95
+ pd.DataFrame().to_excel(empty_excel_output, index=False)
96
+ empty_excel_output.seek(0)
97
+ return empty_excel_output
98
+
99
+ excel_output = io.BytesIO()
100
+ # The DataFrame's index (policy_id) will be included by default
101
+ current_df_to_download.to_excel(excel_output, sheet_name='ModelPoints', engine='xlsxwriter', index=True)
102
+ excel_output.seek(0)
103
+ return excel_output
104
+
105
+ # Wire تعرض the button clicks to their handler functions
106
  generate_btn.click(
107
+ fn=handle_generate_button_click,
108
+ inputs=None, # No inputs from UI needed for generation
109
+ outputs=[model_points_display, df_state]
 
 
110
  )
111
 
112
+ download_excel_btn.click(
113
+ fn=handle_download_button_click,
114
+ inputs=[df_state], # Takes the DataFrame stored in the state
115
+ outputs=[download_excel_btn] # The DownloadButton itself is the output for file streams
 
 
 
116
  )
117
+
118
+ # Optionally, load data when the app starts (or leave it empty until generate is clicked)
119
+ # demo.load(handle_generate_button_click, outputs=[model_points_display, df_state])
120
+
121
 
122
  if __name__ == "__main__":
123
  demo.launch()