Update app.py
Browse files
app.py
CHANGED
|
@@ -14,58 +14,58 @@ warnings.filterwarnings('ignore')
|
|
| 14 |
plt.style.use('default')
|
| 15 |
sns.set_palette("husl")
|
| 16 |
|
| 17 |
-
def generate_model_points(mp_count=10000, age_min=20, age_max=59,
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
"""
|
| 22 |
Generate seriatim model points for actuarial analysis
|
| 23 |
"""
|
| 24 |
# Set random seed for reproducibility
|
| 25 |
rng = default_rng(seed)
|
| 26 |
-
|
| 27 |
# Issue Age (Integer): age_min - age_max year old
|
| 28 |
age_at_entry = rng.integers(low=age_min, high=age_max+1, size=mp_count)
|
| 29 |
-
|
| 30 |
# Sex (Char) - optional
|
| 31 |
if include_sex:
|
| 32 |
Sex = ["M", "F"]
|
| 33 |
sex = np.fromiter(map(lambda i: Sex[i], rng.integers(low=0, high=len(Sex), size=mp_count)), np.dtype('<U1'))
|
| 34 |
else:
|
| 35 |
sex = np.full(mp_count, "U") # Unknown/Unspecified
|
| 36 |
-
|
| 37 |
# Policy Term (Integer): from policy_terms list
|
| 38 |
policy_term_options = np.array(policy_terms)
|
| 39 |
policy_term = rng.choice(policy_term_options, size=mp_count)
|
| 40 |
-
|
| 41 |
# Sum Assured (Float): sum_assured_min - sum_assured_max
|
| 42 |
sum_assured = np.round((sum_assured_max - sum_assured_min) * rng.random(size=mp_count) + sum_assured_min, -3)
|
| 43 |
-
|
| 44 |
# Duration in month (Int): 1 <= Duration(mth) < Policy Term in month
|
| 45 |
duration_mth = np.floor((policy_term * 12 - 1) * rng.random(size=mp_count)).astype(int) + 1
|
| 46 |
-
|
| 47 |
# Policy Count (Integer): 1 (fixed) or variable
|
| 48 |
if policy_count_fixed:
|
| 49 |
policy_count = np.ones(mp_count, dtype=int)
|
| 50 |
else:
|
| 51 |
policy_count = rng.integers(low=1, high=101, size=mp_count)
|
| 52 |
-
|
| 53 |
# Create DataFrame
|
| 54 |
attrs = ["age_at_entry", "sex", "policy_term", "policy_count", "sum_assured", "duration_mth"]
|
| 55 |
data = [age_at_entry, sex, policy_term, policy_count, sum_assured, duration_mth]
|
| 56 |
-
|
| 57 |
model_point_table = pd.DataFrame(dict(zip(attrs, data)), index=range(1, mp_count+1))
|
| 58 |
model_point_table.index.name = "policy_id"
|
| 59 |
-
|
| 60 |
return model_point_table
|
| 61 |
|
| 62 |
def create_summary_stats(df):
|
| 63 |
"""Generate summary statistics for the model points"""
|
| 64 |
summary_stats = []
|
| 65 |
-
|
| 66 |
# Numeric columns
|
| 67 |
numeric_cols = ['age_at_entry', 'policy_term', 'sum_assured', 'duration_mth', 'policy_count']
|
| 68 |
-
|
| 69 |
for col in numeric_cols:
|
| 70 |
if col in df.columns:
|
| 71 |
stats = {
|
|
@@ -78,7 +78,7 @@ def create_summary_stats(df):
|
|
| 78 |
'Median': f"{df[col].median():.2f}"
|
| 79 |
}
|
| 80 |
summary_stats.append(stats)
|
| 81 |
-
|
| 82 |
# Categorical columns
|
| 83 |
if 'sex' in df.columns:
|
| 84 |
sex_counts = df['sex'].value_counts()
|
|
@@ -93,49 +93,49 @@ def create_summary_stats(df):
|
|
| 93 |
'Median': '-'
|
| 94 |
}
|
| 95 |
summary_stats.append(stats)
|
| 96 |
-
|
| 97 |
return pd.DataFrame(summary_stats)
|
| 98 |
|
| 99 |
def create_distribution_plots(df):
|
| 100 |
"""Create distribution plots for key variables"""
|
| 101 |
fig = make_subplots(
|
| 102 |
rows=2, cols=3,
|
| 103 |
-
subplot_titles=('Age at Entry', 'Policy Term', 'Sum Assured',
|
| 104 |
-
|
| 105 |
specs=[[{'type': 'histogram'}, {'type': 'histogram'}, {'type': 'histogram'}],
|
| 106 |
[{'type': 'histogram'}, {'type': 'histogram'}, {'type': 'bar'}]]
|
| 107 |
)
|
| 108 |
-
|
| 109 |
# Age at Entry
|
| 110 |
fig.add_trace(
|
| 111 |
go.Histogram(x=df['age_at_entry'], name='Age at Entry', nbinsx=20),
|
| 112 |
row=1, col=1
|
| 113 |
)
|
| 114 |
-
|
| 115 |
# Policy Term
|
| 116 |
fig.add_trace(
|
| 117 |
go.Histogram(x=df['policy_term'], name='Policy Term', nbinsx=10),
|
| 118 |
row=1, col=2
|
| 119 |
)
|
| 120 |
-
|
| 121 |
# Sum Assured
|
| 122 |
fig.add_trace(
|
| 123 |
go.Histogram(x=df['sum_assured'], name='Sum Assured', nbinsx=30),
|
| 124 |
row=1, col=3
|
| 125 |
)
|
| 126 |
-
|
| 127 |
# Duration in Months
|
| 128 |
fig.add_trace(
|
| 129 |
go.Histogram(x=df['duration_mth'], name='Duration (Months)', nbinsx=25),
|
| 130 |
row=2, col=1
|
| 131 |
)
|
| 132 |
-
|
| 133 |
# Policy Count
|
| 134 |
fig.add_trace(
|
| 135 |
go.Histogram(x=df['policy_count'], name='Policy Count', nbinsx=20),
|
| 136 |
row=2, col=2
|
| 137 |
)
|
| 138 |
-
|
| 139 |
# Sex Distribution
|
| 140 |
if 'sex' in df.columns:
|
| 141 |
sex_counts = df['sex'].value_counts()
|
|
@@ -143,23 +143,23 @@ def create_distribution_plots(df):
|
|
| 143 |
go.Bar(x=sex_counts.index, y=sex_counts.values, name='Sex Distribution'),
|
| 144 |
row=2, col=3
|
| 145 |
)
|
| 146 |
-
|
| 147 |
fig.update_layout(
|
| 148 |
height=800,
|
| 149 |
title_text="Model Points Distribution Analysis",
|
| 150 |
showlegend=False
|
| 151 |
)
|
| 152 |
-
|
| 153 |
return fig
|
| 154 |
|
| 155 |
def create_correlation_heatmap(df):
|
| 156 |
"""Create correlation heatmap for numeric variables"""
|
| 157 |
numeric_cols = ['age_at_entry', 'policy_term', 'sum_assured', 'duration_mth', 'policy_count']
|
| 158 |
available_cols = [col for col in numeric_cols if col in df.columns]
|
| 159 |
-
|
| 160 |
if len(available_cols) > 1:
|
| 161 |
corr_matrix = df[available_cols].corr()
|
| 162 |
-
|
| 163 |
fig = go.Figure(data=go.Heatmap(
|
| 164 |
z=corr_matrix.values,
|
| 165 |
x=corr_matrix.columns,
|
|
@@ -171,48 +171,48 @@ def create_correlation_heatmap(df):
|
|
| 171 |
textfont={"size": 12},
|
| 172 |
hoverongaps=False
|
| 173 |
))
|
| 174 |
-
|
| 175 |
fig.update_layout(
|
| 176 |
title='Correlation Matrix of Model Point Variables',
|
| 177 |
width=600,
|
| 178 |
height=500
|
| 179 |
)
|
| 180 |
-
|
| 181 |
return fig
|
| 182 |
else:
|
| 183 |
return go.Figure().add_annotation(text="Not enough numeric variables for correlation analysis")
|
| 184 |
|
| 185 |
def create_age_term_analysis(df):
|
| 186 |
"""Create age vs policy term analysis"""
|
| 187 |
-
fig = px.box(df, x='policy_term', y='age_at_entry',
|
| 188 |
title='Age at Entry Distribution by Policy Term',
|
| 189 |
labels={'policy_term': 'Policy Term (Years)', 'age_at_entry': 'Age at Entry'})
|
| 190 |
-
|
| 191 |
fig.update_layout(height=400)
|
| 192 |
return fig
|
| 193 |
|
| 194 |
def create_portfolio_metrics(df):
|
| 195 |
"""Calculate portfolio-level metrics"""
|
| 196 |
metrics = {}
|
| 197 |
-
|
| 198 |
# Total exposure
|
| 199 |
metrics['Total Policies'] = f"{len(df):,}"
|
| 200 |
metrics['Total Sum Assured'] = f"${df['sum_assured'].sum():,.0f}"
|
| 201 |
metrics['Average Sum Assured'] = f"${df['sum_assured'].mean():,.0f}"
|
| 202 |
-
|
| 203 |
# Age metrics
|
| 204 |
metrics['Average Age at Entry'] = f"{df['age_at_entry'].mean():.1f} years"
|
| 205 |
metrics['Age Range'] = f"{df['age_at_entry'].min()}-{df['age_at_entry'].max()} years"
|
| 206 |
-
|
| 207 |
# Policy term metrics
|
| 208 |
metrics['Average Policy Term'] = f"{df['policy_term'].mean():.1f} years"
|
| 209 |
term_dist = df['policy_term'].value_counts().sort_index()
|
| 210 |
metrics['Policy Term Distribution'] = ', '.join([f"{term}Y: {count:,}" for term, count in term_dist.items()])
|
| 211 |
-
|
| 212 |
# Duration metrics
|
| 213 |
metrics['Average Duration'] = f"{df['duration_mth'].mean():.1f} months"
|
| 214 |
metrics['Duration Range'] = f"{df['duration_mth'].min()}-{df['duration_mth'].max()} months"
|
| 215 |
-
|
| 216 |
# Convert to DataFrame for display
|
| 217 |
metrics_df = pd.DataFrame(list(metrics.items()), columns=['Metric', 'Value'])
|
| 218 |
return metrics_df
|
|
@@ -225,26 +225,26 @@ def export_to_csv(df):
|
|
| 225 |
with gr.Blocks(title="Actuarial Model Points Generator") as demo:
|
| 226 |
gr.Markdown("""
|
| 227 |
# π Actuarial Model Points Generator
|
| 228 |
-
|
| 229 |
Generate synthetic seriatim policy data for actuarial modeling, cluster analysis, and portfolio testing.
|
| 230 |
Perfect for creating realistic test datasets for insurance product development and risk analysis.
|
| 231 |
""")
|
| 232 |
-
|
| 233 |
with gr.Row():
|
| 234 |
with gr.Column(scale=1):
|
| 235 |
gr.Markdown("### Generation Parameters")
|
| 236 |
-
|
| 237 |
# Basic parameters
|
| 238 |
mp_count = gr.Slider(
|
| 239 |
minimum=100, maximum=50000, value=10000, step=100,
|
| 240 |
label="Number of Model Points"
|
| 241 |
)
|
| 242 |
-
|
| 243 |
seed = gr.Number(
|
| 244 |
value=12345, precision=0,
|
| 245 |
label="Random Seed (for reproducibility)"
|
| 246 |
)
|
| 247 |
-
|
| 248 |
# Age parameters
|
| 249 |
gr.Markdown("#### Age Parameters")
|
| 250 |
age_min = gr.Slider(
|
|
@@ -255,7 +255,7 @@ with gr.Blocks(title="Actuarial Model Points Generator") as demo:
|
|
| 255 |
minimum=45, maximum=80, value=59, step=1,
|
| 256 |
label="Maximum Age at Entry"
|
| 257 |
)
|
| 258 |
-
|
| 259 |
# Sum Assured parameters
|
| 260 |
gr.Markdown("#### Sum Assured Parameters")
|
| 261 |
sum_assured_min = gr.Number(
|
|
@@ -266,7 +266,7 @@ with gr.Blocks(title="Actuarial Model Points Generator") as demo:
|
|
| 266 |
value=1000000,
|
| 267 |
label="Maximum Sum Assured ($)"
|
| 268 |
)
|
| 269 |
-
|
| 270 |
# Policy options
|
| 271 |
gr.Markdown("#### Policy Options")
|
| 272 |
policy_terms = gr.CheckboxGroup(
|
|
@@ -274,63 +274,63 @@ with gr.Blocks(title="Actuarial Model Points Generator") as demo:
|
|
| 274 |
value=[10, 15, 20],
|
| 275 |
label="Available Policy Terms (Years)"
|
| 276 |
)
|
| 277 |
-
|
| 278 |
include_sex = gr.Checkbox(
|
| 279 |
value=True,
|
| 280 |
label="Include Sex (M/F) in model points"
|
| 281 |
)
|
| 282 |
-
|
| 283 |
policy_count_fixed = gr.Checkbox(
|
| 284 |
value=True,
|
| 285 |
label="Fixed Policy Count = 1 (uncheck for variable 1-100)"
|
| 286 |
)
|
| 287 |
-
|
| 288 |
generate_btn = gr.Button("π² Generate Model Points", variant="primary")
|
| 289 |
-
|
| 290 |
with gr.Column(scale=2):
|
| 291 |
with gr.Tabs():
|
| 292 |
with gr.TabItem("π Data Table"):
|
| 293 |
model_points_table = gr.Dataframe(
|
| 294 |
label="Generated Model Points",
|
| 295 |
-
height=400,
|
| 296 |
interactive=False
|
| 297 |
)
|
| 298 |
-
|
| 299 |
download_btn = gr.DownloadButton(
|
| 300 |
label="π₯ Download CSV",
|
| 301 |
variant="secondary"
|
| 302 |
)
|
| 303 |
-
|
| 304 |
with gr.TabItem("π Distributions"):
|
| 305 |
distribution_plot = gr.Plot(label="Variable Distributions")
|
| 306 |
-
|
| 307 |
with gr.TabItem("π Analytics"):
|
| 308 |
with gr.Row():
|
| 309 |
correlation_plot = gr.Plot(label="Correlation Analysis")
|
| 310 |
age_term_plot = gr.Plot(label="Age vs Policy Term")
|
| 311 |
-
|
| 312 |
with gr.TabItem("π Statistics"):
|
| 313 |
with gr.Row():
|
| 314 |
with gr.Column():
|
| 315 |
portfolio_metrics = gr.Dataframe(label="Portfolio Metrics")
|
| 316 |
with gr.Column():
|
| 317 |
summary_stats = gr.Dataframe(label="Summary Statistics")
|
| 318 |
-
|
| 319 |
gr.Markdown("""
|
| 320 |
### π― Use Cases
|
| 321 |
-
|
| 322 |
**Actuarial Applications:**
|
| 323 |
- **Cluster Analysis**: Group similar policies for pricing and reserving
|
| 324 |
- **Portfolio Testing**: Stress test models with synthetic data
|
| 325 |
- **Product Development**: Analyze policy mix and profitability
|
| 326 |
- **Risk Management**: Understand exposure concentrations
|
| 327 |
-
|
| 328 |
**Key Features:**
|
| 329 |
- **Realistic Distributions**: Age, term, and sum assured follow typical insurance patterns
|
| 330 |
- **Existing Policies**: Duration > 0 represents in-force business
|
| 331 |
- **Flexible Parameters**: Customize age ranges, policy terms, and sum assured limits
|
| 332 |
- **Reproducible**: Fixed seed ensures consistent results
|
| 333 |
-
|
| 334 |
**Generated Variables:**
|
| 335 |
- `policy_id`: Unique identifier for each policy
|
| 336 |
- `age_at_entry`: Issue age (customizable range)
|
|
@@ -340,14 +340,14 @@ with gr.Blocks(title="Actuarial Model Points Generator") as demo:
|
|
| 340 |
- `sum_assured`: Coverage amount (customizable range)
|
| 341 |
- `duration_mth`: Months since issue (1 to term-1)
|
| 342 |
""")
|
| 343 |
-
|
| 344 |
# Event handlers
|
| 345 |
-
def generate_and_analyze(mp_count, age_min, age_max, sum_assured_min, sum_assured_max,
|
| 346 |
-
|
| 347 |
"""Generate model points and all analyses"""
|
| 348 |
if not policy_terms:
|
| 349 |
policy_terms = [10, 15, 20] # Default if none selected
|
| 350 |
-
|
| 351 |
# Generate model points
|
| 352 |
df = generate_model_points(
|
| 353 |
mp_count=int(mp_count),
|
|
@@ -360,7 +360,7 @@ with gr.Blocks(title="Actuarial Model Points Generator") as demo:
|
|
| 360 |
policy_count_fixed=policy_count_fixed,
|
| 361 |
seed=int(seed)
|
| 362 |
)
|
| 363 |
-
|
| 364 |
# Generate analyses
|
| 365 |
dist_plot = create_distribution_plots(df)
|
| 366 |
corr_plot = create_correlation_heatmap(df)
|
|
@@ -368,26 +368,26 @@ with gr.Blocks(title="Actuarial Model Points Generator") as demo:
|
|
| 368 |
portfolio_metrics_df = create_portfolio_metrics(df)
|
| 369 |
summary_stats_df = create_summary_stats(df)
|
| 370 |
csv_data = export_to_csv(df)
|
| 371 |
-
|
| 372 |
-
return (df, dist_plot, corr_plot, age_term_plot,
|
| 373 |
portfolio_metrics_df, summary_stats_df, csv_data)
|
| 374 |
-
|
| 375 |
# Connect the generate button
|
| 376 |
generate_btn.click(
|
| 377 |
fn=generate_and_analyze,
|
| 378 |
inputs=[mp_count, age_min, age_max, sum_assured_min, sum_assured_max,
|
| 379 |
policy_terms, include_sex, policy_count_fixed, seed],
|
| 380 |
-
outputs=[model_points_table, distribution_plot, correlation_plot,
|
| 381 |
-
|
| 382 |
)
|
| 383 |
-
|
| 384 |
# Initialize with default values
|
| 385 |
demo.load(
|
| 386 |
fn=generate_and_analyze,
|
| 387 |
inputs=[mp_count, age_min, age_max, sum_assured_min, sum_assured_max,
|
| 388 |
policy_terms, include_sex, policy_count_fixed, seed],
|
| 389 |
outputs=[model_points_table, distribution_plot, correlation_plot,
|
| 390 |
-
|
| 391 |
)
|
| 392 |
|
| 393 |
if __name__ == "__main__":
|
|
|
|
| 14 |
plt.style.use('default')
|
| 15 |
sns.set_palette("husl")
|
| 16 |
|
| 17 |
+
def generate_model_points(mp_count=10000, age_min=20, age_max=59,
|
| 18 |
+
sum_assured_min=10000, sum_assured_max=1000000,
|
| 19 |
+
policy_terms=[10, 15, 20], include_sex=True,
|
| 20 |
+
policy_count_fixed=True, seed=12345):
|
| 21 |
"""
|
| 22 |
Generate seriatim model points for actuarial analysis
|
| 23 |
"""
|
| 24 |
# Set random seed for reproducibility
|
| 25 |
rng = default_rng(seed)
|
| 26 |
+
|
| 27 |
# Issue Age (Integer): age_min - age_max year old
|
| 28 |
age_at_entry = rng.integers(low=age_min, high=age_max+1, size=mp_count)
|
| 29 |
+
|
| 30 |
# Sex (Char) - optional
|
| 31 |
if include_sex:
|
| 32 |
Sex = ["M", "F"]
|
| 33 |
sex = np.fromiter(map(lambda i: Sex[i], rng.integers(low=0, high=len(Sex), size=mp_count)), np.dtype('<U1'))
|
| 34 |
else:
|
| 35 |
sex = np.full(mp_count, "U") # Unknown/Unspecified
|
| 36 |
+
|
| 37 |
# Policy Term (Integer): from policy_terms list
|
| 38 |
policy_term_options = np.array(policy_terms)
|
| 39 |
policy_term = rng.choice(policy_term_options, size=mp_count)
|
| 40 |
+
|
| 41 |
# Sum Assured (Float): sum_assured_min - sum_assured_max
|
| 42 |
sum_assured = np.round((sum_assured_max - sum_assured_min) * rng.random(size=mp_count) + sum_assured_min, -3)
|
| 43 |
+
|
| 44 |
# Duration in month (Int): 1 <= Duration(mth) < Policy Term in month
|
| 45 |
duration_mth = np.floor((policy_term * 12 - 1) * rng.random(size=mp_count)).astype(int) + 1
|
| 46 |
+
|
| 47 |
# Policy Count (Integer): 1 (fixed) or variable
|
| 48 |
if policy_count_fixed:
|
| 49 |
policy_count = np.ones(mp_count, dtype=int)
|
| 50 |
else:
|
| 51 |
policy_count = rng.integers(low=1, high=101, size=mp_count)
|
| 52 |
+
|
| 53 |
# Create DataFrame
|
| 54 |
attrs = ["age_at_entry", "sex", "policy_term", "policy_count", "sum_assured", "duration_mth"]
|
| 55 |
data = [age_at_entry, sex, policy_term, policy_count, sum_assured, duration_mth]
|
| 56 |
+
|
| 57 |
model_point_table = pd.DataFrame(dict(zip(attrs, data)), index=range(1, mp_count+1))
|
| 58 |
model_point_table.index.name = "policy_id"
|
| 59 |
+
|
| 60 |
return model_point_table
|
| 61 |
|
| 62 |
def create_summary_stats(df):
|
| 63 |
"""Generate summary statistics for the model points"""
|
| 64 |
summary_stats = []
|
| 65 |
+
|
| 66 |
# Numeric columns
|
| 67 |
numeric_cols = ['age_at_entry', 'policy_term', 'sum_assured', 'duration_mth', 'policy_count']
|
| 68 |
+
|
| 69 |
for col in numeric_cols:
|
| 70 |
if col in df.columns:
|
| 71 |
stats = {
|
|
|
|
| 78 |
'Median': f"{df[col].median():.2f}"
|
| 79 |
}
|
| 80 |
summary_stats.append(stats)
|
| 81 |
+
|
| 82 |
# Categorical columns
|
| 83 |
if 'sex' in df.columns:
|
| 84 |
sex_counts = df['sex'].value_counts()
|
|
|
|
| 93 |
'Median': '-'
|
| 94 |
}
|
| 95 |
summary_stats.append(stats)
|
| 96 |
+
|
| 97 |
return pd.DataFrame(summary_stats)
|
| 98 |
|
| 99 |
def create_distribution_plots(df):
|
| 100 |
"""Create distribution plots for key variables"""
|
| 101 |
fig = make_subplots(
|
| 102 |
rows=2, cols=3,
|
| 103 |
+
subplot_titles=('Age at Entry', 'Policy Term', 'Sum Assured',
|
| 104 |
+
'Duration (Months)', 'Policy Count', 'Sex Distribution'),
|
| 105 |
specs=[[{'type': 'histogram'}, {'type': 'histogram'}, {'type': 'histogram'}],
|
| 106 |
[{'type': 'histogram'}, {'type': 'histogram'}, {'type': 'bar'}]]
|
| 107 |
)
|
| 108 |
+
|
| 109 |
# Age at Entry
|
| 110 |
fig.add_trace(
|
| 111 |
go.Histogram(x=df['age_at_entry'], name='Age at Entry', nbinsx=20),
|
| 112 |
row=1, col=1
|
| 113 |
)
|
| 114 |
+
|
| 115 |
# Policy Term
|
| 116 |
fig.add_trace(
|
| 117 |
go.Histogram(x=df['policy_term'], name='Policy Term', nbinsx=10),
|
| 118 |
row=1, col=2
|
| 119 |
)
|
| 120 |
+
|
| 121 |
# Sum Assured
|
| 122 |
fig.add_trace(
|
| 123 |
go.Histogram(x=df['sum_assured'], name='Sum Assured', nbinsx=30),
|
| 124 |
row=1, col=3
|
| 125 |
)
|
| 126 |
+
|
| 127 |
# Duration in Months
|
| 128 |
fig.add_trace(
|
| 129 |
go.Histogram(x=df['duration_mth'], name='Duration (Months)', nbinsx=25),
|
| 130 |
row=2, col=1
|
| 131 |
)
|
| 132 |
+
|
| 133 |
# Policy Count
|
| 134 |
fig.add_trace(
|
| 135 |
go.Histogram(x=df['policy_count'], name='Policy Count', nbinsx=20),
|
| 136 |
row=2, col=2
|
| 137 |
)
|
| 138 |
+
|
| 139 |
# Sex Distribution
|
| 140 |
if 'sex' in df.columns:
|
| 141 |
sex_counts = df['sex'].value_counts()
|
|
|
|
| 143 |
go.Bar(x=sex_counts.index, y=sex_counts.values, name='Sex Distribution'),
|
| 144 |
row=2, col=3
|
| 145 |
)
|
| 146 |
+
|
| 147 |
fig.update_layout(
|
| 148 |
height=800,
|
| 149 |
title_text="Model Points Distribution Analysis",
|
| 150 |
showlegend=False
|
| 151 |
)
|
| 152 |
+
|
| 153 |
return fig
|
| 154 |
|
| 155 |
def create_correlation_heatmap(df):
|
| 156 |
"""Create correlation heatmap for numeric variables"""
|
| 157 |
numeric_cols = ['age_at_entry', 'policy_term', 'sum_assured', 'duration_mth', 'policy_count']
|
| 158 |
available_cols = [col for col in numeric_cols if col in df.columns]
|
| 159 |
+
|
| 160 |
if len(available_cols) > 1:
|
| 161 |
corr_matrix = df[available_cols].corr()
|
| 162 |
+
|
| 163 |
fig = go.Figure(data=go.Heatmap(
|
| 164 |
z=corr_matrix.values,
|
| 165 |
x=corr_matrix.columns,
|
|
|
|
| 171 |
textfont={"size": 12},
|
| 172 |
hoverongaps=False
|
| 173 |
))
|
| 174 |
+
|
| 175 |
fig.update_layout(
|
| 176 |
title='Correlation Matrix of Model Point Variables',
|
| 177 |
width=600,
|
| 178 |
height=500
|
| 179 |
)
|
| 180 |
+
|
| 181 |
return fig
|
| 182 |
else:
|
| 183 |
return go.Figure().add_annotation(text="Not enough numeric variables for correlation analysis")
|
| 184 |
|
| 185 |
def create_age_term_analysis(df):
|
| 186 |
"""Create age vs policy term analysis"""
|
| 187 |
+
fig = px.box(df, x='policy_term', y='age_at_entry',
|
| 188 |
title='Age at Entry Distribution by Policy Term',
|
| 189 |
labels={'policy_term': 'Policy Term (Years)', 'age_at_entry': 'Age at Entry'})
|
| 190 |
+
|
| 191 |
fig.update_layout(height=400)
|
| 192 |
return fig
|
| 193 |
|
| 194 |
def create_portfolio_metrics(df):
|
| 195 |
"""Calculate portfolio-level metrics"""
|
| 196 |
metrics = {}
|
| 197 |
+
|
| 198 |
# Total exposure
|
| 199 |
metrics['Total Policies'] = f"{len(df):,}"
|
| 200 |
metrics['Total Sum Assured'] = f"${df['sum_assured'].sum():,.0f}"
|
| 201 |
metrics['Average Sum Assured'] = f"${df['sum_assured'].mean():,.0f}"
|
| 202 |
+
|
| 203 |
# Age metrics
|
| 204 |
metrics['Average Age at Entry'] = f"{df['age_at_entry'].mean():.1f} years"
|
| 205 |
metrics['Age Range'] = f"{df['age_at_entry'].min()}-{df['age_at_entry'].max()} years"
|
| 206 |
+
|
| 207 |
# Policy term metrics
|
| 208 |
metrics['Average Policy Term'] = f"{df['policy_term'].mean():.1f} years"
|
| 209 |
term_dist = df['policy_term'].value_counts().sort_index()
|
| 210 |
metrics['Policy Term Distribution'] = ', '.join([f"{term}Y: {count:,}" for term, count in term_dist.items()])
|
| 211 |
+
|
| 212 |
# Duration metrics
|
| 213 |
metrics['Average Duration'] = f"{df['duration_mth'].mean():.1f} months"
|
| 214 |
metrics['Duration Range'] = f"{df['duration_mth'].min()}-{df['duration_mth'].max()} months"
|
| 215 |
+
|
| 216 |
# Convert to DataFrame for display
|
| 217 |
metrics_df = pd.DataFrame(list(metrics.items()), columns=['Metric', 'Value'])
|
| 218 |
return metrics_df
|
|
|
|
| 225 |
with gr.Blocks(title="Actuarial Model Points Generator") as demo:
|
| 226 |
gr.Markdown("""
|
| 227 |
# π Actuarial Model Points Generator
|
| 228 |
+
|
| 229 |
Generate synthetic seriatim policy data for actuarial modeling, cluster analysis, and portfolio testing.
|
| 230 |
Perfect for creating realistic test datasets for insurance product development and risk analysis.
|
| 231 |
""")
|
| 232 |
+
|
| 233 |
with gr.Row():
|
| 234 |
with gr.Column(scale=1):
|
| 235 |
gr.Markdown("### Generation Parameters")
|
| 236 |
+
|
| 237 |
# Basic parameters
|
| 238 |
mp_count = gr.Slider(
|
| 239 |
minimum=100, maximum=50000, value=10000, step=100,
|
| 240 |
label="Number of Model Points"
|
| 241 |
)
|
| 242 |
+
|
| 243 |
seed = gr.Number(
|
| 244 |
value=12345, precision=0,
|
| 245 |
label="Random Seed (for reproducibility)"
|
| 246 |
)
|
| 247 |
+
|
| 248 |
# Age parameters
|
| 249 |
gr.Markdown("#### Age Parameters")
|
| 250 |
age_min = gr.Slider(
|
|
|
|
| 255 |
minimum=45, maximum=80, value=59, step=1,
|
| 256 |
label="Maximum Age at Entry"
|
| 257 |
)
|
| 258 |
+
|
| 259 |
# Sum Assured parameters
|
| 260 |
gr.Markdown("#### Sum Assured Parameters")
|
| 261 |
sum_assured_min = gr.Number(
|
|
|
|
| 266 |
value=1000000,
|
| 267 |
label="Maximum Sum Assured ($)"
|
| 268 |
)
|
| 269 |
+
|
| 270 |
# Policy options
|
| 271 |
gr.Markdown("#### Policy Options")
|
| 272 |
policy_terms = gr.CheckboxGroup(
|
|
|
|
| 274 |
value=[10, 15, 20],
|
| 275 |
label="Available Policy Terms (Years)"
|
| 276 |
)
|
| 277 |
+
|
| 278 |
include_sex = gr.Checkbox(
|
| 279 |
value=True,
|
| 280 |
label="Include Sex (M/F) in model points"
|
| 281 |
)
|
| 282 |
+
|
| 283 |
policy_count_fixed = gr.Checkbox(
|
| 284 |
value=True,
|
| 285 |
label="Fixed Policy Count = 1 (uncheck for variable 1-100)"
|
| 286 |
)
|
| 287 |
+
|
| 288 |
generate_btn = gr.Button("π² Generate Model Points", variant="primary")
|
| 289 |
+
|
| 290 |
with gr.Column(scale=2):
|
| 291 |
with gr.Tabs():
|
| 292 |
with gr.TabItem("π Data Table"):
|
| 293 |
model_points_table = gr.Dataframe(
|
| 294 |
label="Generated Model Points",
|
| 295 |
+
# height=400, <-- This line was removed/commented out
|
| 296 |
interactive=False
|
| 297 |
)
|
| 298 |
+
|
| 299 |
download_btn = gr.DownloadButton(
|
| 300 |
label="π₯ Download CSV",
|
| 301 |
variant="secondary"
|
| 302 |
)
|
| 303 |
+
|
| 304 |
with gr.TabItem("π Distributions"):
|
| 305 |
distribution_plot = gr.Plot(label="Variable Distributions")
|
| 306 |
+
|
| 307 |
with gr.TabItem("π Analytics"):
|
| 308 |
with gr.Row():
|
| 309 |
correlation_plot = gr.Plot(label="Correlation Analysis")
|
| 310 |
age_term_plot = gr.Plot(label="Age vs Policy Term")
|
| 311 |
+
|
| 312 |
with gr.TabItem("π Statistics"):
|
| 313 |
with gr.Row():
|
| 314 |
with gr.Column():
|
| 315 |
portfolio_metrics = gr.Dataframe(label="Portfolio Metrics")
|
| 316 |
with gr.Column():
|
| 317 |
summary_stats = gr.Dataframe(label="Summary Statistics")
|
| 318 |
+
|
| 319 |
gr.Markdown("""
|
| 320 |
### π― Use Cases
|
| 321 |
+
|
| 322 |
**Actuarial Applications:**
|
| 323 |
- **Cluster Analysis**: Group similar policies for pricing and reserving
|
| 324 |
- **Portfolio Testing**: Stress test models with synthetic data
|
| 325 |
- **Product Development**: Analyze policy mix and profitability
|
| 326 |
- **Risk Management**: Understand exposure concentrations
|
| 327 |
+
|
| 328 |
**Key Features:**
|
| 329 |
- **Realistic Distributions**: Age, term, and sum assured follow typical insurance patterns
|
| 330 |
- **Existing Policies**: Duration > 0 represents in-force business
|
| 331 |
- **Flexible Parameters**: Customize age ranges, policy terms, and sum assured limits
|
| 332 |
- **Reproducible**: Fixed seed ensures consistent results
|
| 333 |
+
|
| 334 |
**Generated Variables:**
|
| 335 |
- `policy_id`: Unique identifier for each policy
|
| 336 |
- `age_at_entry`: Issue age (customizable range)
|
|
|
|
| 340 |
- `sum_assured`: Coverage amount (customizable range)
|
| 341 |
- `duration_mth`: Months since issue (1 to term-1)
|
| 342 |
""")
|
| 343 |
+
|
| 344 |
# Event handlers
|
| 345 |
+
def generate_and_analyze(mp_count, age_min, age_max, sum_assured_min, sum_assured_max,
|
| 346 |
+
policy_terms, include_sex, policy_count_fixed, seed):
|
| 347 |
"""Generate model points and all analyses"""
|
| 348 |
if not policy_terms:
|
| 349 |
policy_terms = [10, 15, 20] # Default if none selected
|
| 350 |
+
|
| 351 |
# Generate model points
|
| 352 |
df = generate_model_points(
|
| 353 |
mp_count=int(mp_count),
|
|
|
|
| 360 |
policy_count_fixed=policy_count_fixed,
|
| 361 |
seed=int(seed)
|
| 362 |
)
|
| 363 |
+
|
| 364 |
# Generate analyses
|
| 365 |
dist_plot = create_distribution_plots(df)
|
| 366 |
corr_plot = create_correlation_heatmap(df)
|
|
|
|
| 368 |
portfolio_metrics_df = create_portfolio_metrics(df)
|
| 369 |
summary_stats_df = create_summary_stats(df)
|
| 370 |
csv_data = export_to_csv(df)
|
| 371 |
+
|
| 372 |
+
return (df, dist_plot, corr_plot, age_term_plot,
|
| 373 |
portfolio_metrics_df, summary_stats_df, csv_data)
|
| 374 |
+
|
| 375 |
# Connect the generate button
|
| 376 |
generate_btn.click(
|
| 377 |
fn=generate_and_analyze,
|
| 378 |
inputs=[mp_count, age_min, age_max, sum_assured_min, sum_assured_max,
|
| 379 |
policy_terms, include_sex, policy_count_fixed, seed],
|
| 380 |
+
outputs=[model_points_table, distribution_plot, correlation_plot,
|
| 381 |
+
age_term_plot, portfolio_metrics, summary_stats, download_btn]
|
| 382 |
)
|
| 383 |
+
|
| 384 |
# Initialize with default values
|
| 385 |
demo.load(
|
| 386 |
fn=generate_and_analyze,
|
| 387 |
inputs=[mp_count, age_min, age_max, sum_assured_min, sum_assured_max,
|
| 388 |
policy_terms, include_sex, policy_count_fixed, seed],
|
| 389 |
outputs=[model_points_table, distribution_plot, correlation_plot,
|
| 390 |
+
age_term_plot, portfolio_metrics, summary_stats, download_btn]
|
| 391 |
)
|
| 392 |
|
| 393 |
if __name__ == "__main__":
|