alidenewade commited on
Commit
0b55761
·
verified ·
1 Parent(s): b6acbd8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -41
app.py CHANGED
@@ -34,12 +34,16 @@ def generate_custom_model_points(
34
  sex_col = np.full(mp_count_val, "U")
35
 
36
  if not policy_terms_selection_val:
37
- policy_terms_selection_val = [10, 15, 20]
38
  policy_term_options = np.array(policy_terms_selection_val).astype(int)
 
 
 
 
39
  policy_term_col = rng.choice(policy_term_options, size=mp_count_val)
40
 
41
  sum_assured_col = np.round((sa_max_val - sa_min_val) * rng.random(size=mp_count_val) + sa_min_val, -3)
42
-
43
  max_duration_val = policy_term_col * 12 - 1
44
  max_duration_val = np.maximum(1, max_duration_val)
45
  duration_mth_col = (rng.random(size=mp_count_val) * max_duration_val).astype(int) + 1
@@ -61,7 +65,7 @@ def generate_custom_model_points(
61
  return model_point_df
62
 
63
  # 2. Gradio App Definition
64
- with gr.Blocks() as demo: # Removed theme=gr.themes.Soft() to use Gradio default
65
  gr.Markdown("# Actuarial Model Points Generator")
66
  gr.Markdown(
67
  "Configure the parameters below to generate a custom set of seriatim model points. "
@@ -89,102 +93,132 @@ with gr.Blocks() as demo: # Removed theme=gr.themes.Soft() to use Gradio default
89
  generate_btn = gr.Button("Generate Model Points", variant="primary")
90
 
91
  with gr.Column(scale=2):
92
- model_points_display = gr.Dataframe(label="Generated Model Points", wrap=True)
93
- download_excel_btn = gr.DownloadButton(label="Download Excel", value="model_points.xlsx", variant="secondary")
94
  gr.Markdown("---")
95
  gr.Markdown("## 📊 Data Summary & Analysis")
96
  with gr.Tabs():
97
  with gr.TabItem("Numerical Summary"):
98
- summary_stats_display = gr.Markdown(value="*No data generated yet or summary failed.*")
99
  with gr.TabItem("Distribution Plot"):
100
  gr.Markdown("Distribution of **Sum Assured**. Data is generated uniformly; a normal curve is fitted for illustration.")
101
  distribution_plot_display = gr.Plot()
102
  with gr.TabItem("Categorical Summary"):
103
- categorical_summary_display = gr.Markdown(value="*No data generated yet or summary failed.*")
 
 
 
 
104
 
105
  # 3. Event Handlers
106
  def handle_generate_button_click(
107
  mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed
108
  ):
109
- error_desc_md = "Error during generation. Check parameters."
110
- error_cat_md = "Error during generation. Check parameters."
 
 
 
 
111
  current_df = df_state.value if df_state.value is not None else pd.DataFrame()
112
 
113
  if int(age_m) >= int(age_mx):
114
  gr.Warning("Minimum Age must be less than Maximum Age.")
115
- return current_df, df_state.value, "Error: Minimum Age must be less than Maximum Age.", None, "Error: Minimum Age must be less than Maximum Age."
116
  if float(sa_m) >= float(sa_mx):
117
  gr.Warning("Minimum Sum Assured must be less than Maximum Sum Assured.")
118
- return current_df, df_state.value, "Error: Minimum Sum Assured must be less than Maximum Sum Assured.", None, "Error: Minimum Sum Assured must be less than Maximum Sum Assured."
119
-
 
 
 
120
  gr.Info("Generating model points... Please wait.")
121
  try:
122
  df = generate_custom_model_points(mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed)
123
  gr.Info(f"{len(df)} model points generated successfully!")
124
 
 
125
  numerical_cols = ['age_at_entry', 'sum_assured', 'duration_mth', 'policy_count']
126
  existing_numerical_cols = [col for col in numerical_cols if col in df.columns]
 
127
  if existing_numerical_cols and not df.empty:
128
  desc_stats = df[existing_numerical_cols].describe().transpose()
129
  if 'count' in desc_stats.columns:
130
  desc_stats['count'] = desc_stats['count'].astype(int)
131
- float_format_cols = ['mean', 'std', 'min', '25%', '50%', '75%', 'max']
132
- for col_name in float_format_cols:
133
- if col_name in desc_stats.columns and pd.api.types.is_numeric_dtype(desc_stats[col_name]):
134
- desc_stats[col_name] = desc_stats[col_name].apply(lambda x: f"{x:,.2f}" if pd.notnull(x) else x)
135
- desc_stats_md = "### Descriptive Statistics\n" + desc_stats.to_markdown()
136
- else:
137
- desc_stats_md = "No numerical data to describe or DataFrame is empty."
138
-
139
- fig = None
140
  if 'sum_assured' in df.columns and not df['sum_assured'].empty:
141
  fig, ax = plt.subplots(figsize=(8, 5))
142
  ax.hist(df['sum_assured'], bins=50, density=True, alpha=0.7, color='skyblue', edgecolor='black', label='Actual Distribution')
143
- mu, std = stats.norm.fit(df['sum_assured'])
144
  xmin_hist, xmax_hist = ax.get_xlim()
145
  x_norm = np.linspace(xmin_hist, xmax_hist, 100)
146
- p_norm = stats.norm.pdf(x_norm, mu, std)
147
- ax.plot(x_norm, p_norm, 'r--', linewidth=2, label=f'Fitted Normal (μ={mu:,.0f}, σ={std:,.0f})')
148
  ax.set_title(f"Sum Assured Distribution (N={len(df)})", fontsize=14)
149
  ax.set_xlabel("Sum Assured ($)", fontsize=12)
150
  ax.set_ylabel("Density", fontsize=12)
151
  ax.legend()
152
  ax.grid(axis='y', linestyle='--', alpha=0.7)
153
  plt.tight_layout()
154
- plot_object = fig
 
 
 
 
155
 
156
- cat_summary_md = "### Categorical Data Frequencies\n\n"
157
  if not df.empty:
158
  if 'sex' in df.columns:
159
  sex_counts = df['sex'].value_counts().reset_index()
160
  sex_counts.columns = ['Sex', 'Count']
161
- sex_counts['Percentage'] = (sex_counts['Count'] / sex_counts['Count'].sum() * 100).round(2).astype(str) + '%'
162
- cat_summary_md += "#### Sex Distribution\n" + sex_counts.to_markdown(index=False) + "\n\n"
 
 
 
163
  if 'policy_term' in df.columns:
164
  term_counts = df['policy_term'].value_counts().sort_index().reset_index()
165
  term_counts.columns = ['Policy Term (Years)', 'Count']
166
- term_counts['Percentage'] = (term_counts['Count'] / term_counts['Count'].sum() * 100).round(2).astype(str) + '%'
167
- cat_summary_md += "#### Policy Term Distribution\n" + term_counts.to_markdown(index=False)
168
- if cat_summary_md == "### Categorical Data Frequencies\n\n":
169
- cat_summary_md += "*No categorical columns ('sex', 'policy_term') found or data is empty.*"
170
- else:
171
- cat_summary_md = "*DataFrame is empty, no categorical data to summarize.*"
 
 
172
 
173
- return df, df, desc_stats_md, plot_object, cat_summary_md
174
 
175
  except Exception as e:
176
  gr.Error(f"An error occurred during generation: {str(e)}")
177
- return current_df, df_state.value, f"Error: {str(e)}", None, f"Error: {str(e)}"
 
178
 
179
 
180
  def handle_download_button_click(current_df_to_download):
181
  if current_df_to_download is None or not isinstance(current_df_to_download, pd.DataFrame) or current_df_to_download.empty:
182
  gr.Warning("No data available to download. Generate model points first.")
183
- return None
 
 
 
184
  excel_output = io.BytesIO()
 
 
185
  current_df_to_download.to_excel(excel_output, sheet_name='ModelPoints', engine='xlsxwriter', index=False)
186
  excel_output.seek(0)
187
- return excel_output
 
 
 
 
188
 
189
  inputs_list = [
190
  mp_count_input, seed_input, age_min_input, age_max_input,
@@ -197,14 +231,17 @@ with gr.Blocks() as demo: # Removed theme=gr.themes.Soft() to use Gradio default
197
  inputs=inputs_list,
198
  outputs=[
199
  model_points_display, df_state, summary_stats_display,
200
- distribution_plot_display, categorical_summary_display
201
  ]
202
  )
203
-
 
 
 
204
  download_excel_btn.click(
205
  fn=handle_download_button_click,
206
  inputs=[df_state],
207
- outputs=[download_excel_btn]
208
  )
209
 
210
  if __name__ == "__main__":
 
34
  sex_col = np.full(mp_count_val, "U")
35
 
36
  if not policy_terms_selection_val:
37
+ policy_terms_selection_val = [10, 15, 20] # Default if empty
38
  policy_term_options = np.array(policy_terms_selection_val).astype(int)
39
+ if len(policy_term_options) == 0: # Handle case where user deselects all
40
+ policy_term_options = np.array([10, 15, 20]) # Fallback to default
41
+ gr.Warning("No policy terms selected. Using default terms [10, 15, 20].")
42
+
43
  policy_term_col = rng.choice(policy_term_options, size=mp_count_val)
44
 
45
  sum_assured_col = np.round((sa_max_val - sa_min_val) * rng.random(size=mp_count_val) + sa_min_val, -3)
46
+
47
  max_duration_val = policy_term_col * 12 - 1
48
  max_duration_val = np.maximum(1, max_duration_val)
49
  duration_mth_col = (rng.random(size=mp_count_val) * max_duration_val).astype(int) + 1
 
65
  return model_point_df
66
 
67
  # 2. Gradio App Definition
68
+ with gr.Blocks() as demo:
69
  gr.Markdown("# Actuarial Model Points Generator")
70
  gr.Markdown(
71
  "Configure the parameters below to generate a custom set of seriatim model points. "
 
93
  generate_btn = gr.Button("Generate Model Points", variant="primary")
94
 
95
  with gr.Column(scale=2):
96
+ model_points_display = gr.Dataframe(label="Generated Model Points", wrap=True, height=400)
97
+ download_excel_btn = gr.DownloadButton(label="Download Excel", variant="secondary") # Removed value, will be set by click
98
  gr.Markdown("---")
99
  gr.Markdown("## 📊 Data Summary & Analysis")
100
  with gr.Tabs():
101
  with gr.TabItem("Numerical Summary"):
102
+ summary_stats_display = gr.Dataframe(label="Descriptive Statistics", wrap=True)
103
  with gr.TabItem("Distribution Plot"):
104
  gr.Markdown("Distribution of **Sum Assured**. Data is generated uniformly; a normal curve is fitted for illustration.")
105
  distribution_plot_display = gr.Plot()
106
  with gr.TabItem("Categorical Summary"):
107
+ # For simplicity, we'll display sex distribution here.
108
+ # You might want to add another Dataframe for policy_term or combine them.
109
+ sex_summary_display = gr.Dataframe(label="Sex Distribution", wrap=True)
110
+ policy_term_summary_display = gr.Dataframe(label="Policy Term Distribution", wrap=True)
111
+
112
 
113
  # 3. Event Handlers
114
  def handle_generate_button_click(
115
  mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed
116
  ):
117
+ empty_df = pd.DataFrame()
118
+ empty_plot = None
119
+ no_data_df_num = pd.DataFrame({'Message': ["No data generated or summary failed."]})
120
+ no_data_df_cat = pd.DataFrame({'Message': ["No data generated or summary failed."]})
121
+
122
+
123
  current_df = df_state.value if df_state.value is not None else pd.DataFrame()
124
 
125
  if int(age_m) >= int(age_mx):
126
  gr.Warning("Minimum Age must be less than Maximum Age.")
127
+ return current_df, df_state.value, pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]}), empty_plot, pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]}), pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]})
128
  if float(sa_m) >= float(sa_mx):
129
  gr.Warning("Minimum Sum Assured must be less than Maximum Sum Assured.")
130
+ return current_df, df_state.value, pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]}), empty_plot, pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]}), pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]})
131
+ if not p_terms:
132
+ gr.Warning("At least one Policy Term must be selected. Using defaults.")
133
+ p_terms = [10, 15, 20] # Apply default if none selected
134
+
135
  gr.Info("Generating model points... Please wait.")
136
  try:
137
  df = generate_custom_model_points(mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed)
138
  gr.Info(f"{len(df)} model points generated successfully!")
139
 
140
+ # Numerical Summary
141
  numerical_cols = ['age_at_entry', 'sum_assured', 'duration_mth', 'policy_count']
142
  existing_numerical_cols = [col for col in numerical_cols if col in df.columns]
143
+ desc_stats_df = no_data_df_num
144
  if existing_numerical_cols and not df.empty:
145
  desc_stats = df[existing_numerical_cols].describe().transpose()
146
  if 'count' in desc_stats.columns:
147
  desc_stats['count'] = desc_stats['count'].astype(int)
148
+ # Gradio Dataframe handles formatting, so raw numbers are fine
149
+ desc_stats_df = desc_stats.reset_index().rename(columns={'index': 'Statistic'})
150
+ elif df.empty:
151
+ desc_stats_df = pd.DataFrame({'Message': ["DataFrame is empty, no numerical data to describe."]})
152
+
153
+
154
+ # Distribution Plot
155
+ fig = empty_plot
 
156
  if 'sum_assured' in df.columns and not df['sum_assured'].empty:
157
  fig, ax = plt.subplots(figsize=(8, 5))
158
  ax.hist(df['sum_assured'], bins=50, density=True, alpha=0.7, color='skyblue', edgecolor='black', label='Actual Distribution')
159
+ mu, std_dev = stats.norm.fit(df['sum_assured'])
160
  xmin_hist, xmax_hist = ax.get_xlim()
161
  x_norm = np.linspace(xmin_hist, xmax_hist, 100)
162
+ p_norm = stats.norm.pdf(x_norm, mu, std_dev)
163
+ ax.plot(x_norm, p_norm, 'r--', linewidth=2, label=f'Fitted Normal (μ={mu:,.0f}, σ={std_dev:,.0f})')
164
  ax.set_title(f"Sum Assured Distribution (N={len(df)})", fontsize=14)
165
  ax.set_xlabel("Sum Assured ($)", fontsize=12)
166
  ax.set_ylabel("Density", fontsize=12)
167
  ax.legend()
168
  ax.grid(axis='y', linestyle='--', alpha=0.7)
169
  plt.tight_layout()
170
+ # Removed plot_object = fig, fig is directly returned
171
+
172
+ # Categorical Summary
173
+ sex_counts_df = no_data_df_cat
174
+ term_counts_df = no_data_df_cat
175
 
 
176
  if not df.empty:
177
  if 'sex' in df.columns:
178
  sex_counts = df['sex'].value_counts().reset_index()
179
  sex_counts.columns = ['Sex', 'Count']
180
+ sex_counts['Percentage'] = (sex_counts['Count'] / sex_counts['Count'].sum() * 100).round(2) # Keep as number for potential sorting in Dataframe
181
+ sex_counts_df = sex_counts
182
+ else:
183
+ sex_counts_df = pd.DataFrame({'Message': ["'sex' column not found or data is empty."]})
184
+
185
  if 'policy_term' in df.columns:
186
  term_counts = df['policy_term'].value_counts().sort_index().reset_index()
187
  term_counts.columns = ['Policy Term (Years)', 'Count']
188
+ term_counts['Percentage'] = (term_counts['Count'] / term_counts['Count'].sum() * 100).round(2) # Keep as number
189
+ term_counts_df = term_counts
190
+ else:
191
+ term_counts_df = pd.DataFrame({'Message': ["'policy_term' column not found or data is empty."]})
192
+ elif df.empty:
193
+ sex_counts_df = pd.DataFrame({'Message': ["DataFrame is empty, no categorical data for 'sex'."]})
194
+ term_counts_df = pd.DataFrame({'Message': ["DataFrame is empty, no categorical data for 'policy_term'."]})
195
+
196
 
197
+ return df, df, desc_stats_df, fig, sex_counts_df, term_counts_df
198
 
199
  except Exception as e:
200
  gr.Error(f"An error occurred during generation: {str(e)}")
201
+ error_df = pd.DataFrame({'Error': [str(e)]})
202
+ return current_df, df_state.value, error_df, empty_plot, error_df, error_df
203
 
204
 
205
  def handle_download_button_click(current_df_to_download):
206
  if current_df_to_download is None or not isinstance(current_df_to_download, pd.DataFrame) or current_df_to_download.empty:
207
  gr.Warning("No data available to download. Generate model points first.")
208
+ # Return a dummy file path or None if Gradio handles it gracefully for gr.DownloadButton
209
+ # For safety, returning a named file path that won't be created avoids errors.
210
+ return gr.DownloadButton.update(interactive=False) # Or simply return None if that works for your Gradio version.
211
+
212
  excel_output = io.BytesIO()
213
+ # Ensure 'Statistic' column from describe().transpose().reset_index() is handled if it exists
214
+ # The main df (current_df_to_download) should be fine as is.
215
  current_df_to_download.to_excel(excel_output, sheet_name='ModelPoints', engine='xlsxwriter', index=False)
216
  excel_output.seek(0)
217
+ # When returning a file for gr.DownloadButton, Gradio expects a file path or a BytesIO object
218
+ # The 'value' of the DownloadButton component itself is not what's downloaded.
219
+ # The function should return the file object.
220
+ return gr.File(file_path=excel_output, file_name="model_points.xlsx", visible=True)
221
+
222
 
223
  inputs_list = [
224
  mp_count_input, seed_input, age_min_input, age_max_input,
 
231
  inputs=inputs_list,
232
  outputs=[
233
  model_points_display, df_state, summary_stats_display,
234
+ distribution_plot_display, sex_summary_display, policy_term_summary_display
235
  ]
236
  )
237
+
238
+ # The DownloadButton's click function should return the file object to the button itself.
239
+ # The `value` parameter in gr.DownloadButton is for the initial filename,
240
+ # but the actual file content comes from the function's return.
241
  download_excel_btn.click(
242
  fn=handle_download_button_click,
243
  inputs=[df_state],
244
+ outputs=[download_excel_btn] # The output should be the button itself to receive the file
245
  )
246
 
247
  if __name__ == "__main__":