Update app.py
Browse files
app.py
CHANGED
@@ -34,12 +34,16 @@ def generate_custom_model_points(
|
|
34 |
sex_col = np.full(mp_count_val, "U")
|
35 |
|
36 |
if not policy_terms_selection_val:
|
37 |
-
policy_terms_selection_val = [10, 15, 20]
|
38 |
policy_term_options = np.array(policy_terms_selection_val).astype(int)
|
|
|
|
|
|
|
|
|
39 |
policy_term_col = rng.choice(policy_term_options, size=mp_count_val)
|
40 |
|
41 |
sum_assured_col = np.round((sa_max_val - sa_min_val) * rng.random(size=mp_count_val) + sa_min_val, -3)
|
42 |
-
|
43 |
max_duration_val = policy_term_col * 12 - 1
|
44 |
max_duration_val = np.maximum(1, max_duration_val)
|
45 |
duration_mth_col = (rng.random(size=mp_count_val) * max_duration_val).astype(int) + 1
|
@@ -61,7 +65,7 @@ def generate_custom_model_points(
|
|
61 |
return model_point_df
|
62 |
|
63 |
# 2. Gradio App Definition
|
64 |
-
with gr.Blocks() as demo:
|
65 |
gr.Markdown("# Actuarial Model Points Generator")
|
66 |
gr.Markdown(
|
67 |
"Configure the parameters below to generate a custom set of seriatim model points. "
|
@@ -89,102 +93,132 @@ with gr.Blocks() as demo: # Removed theme=gr.themes.Soft() to use Gradio default
|
|
89 |
generate_btn = gr.Button("Generate Model Points", variant="primary")
|
90 |
|
91 |
with gr.Column(scale=2):
|
92 |
-
model_points_display = gr.Dataframe(label="Generated Model Points", wrap=True)
|
93 |
-
download_excel_btn = gr.DownloadButton(label="Download Excel",
|
94 |
gr.Markdown("---")
|
95 |
gr.Markdown("## 📊 Data Summary & Analysis")
|
96 |
with gr.Tabs():
|
97 |
with gr.TabItem("Numerical Summary"):
|
98 |
-
summary_stats_display = gr.
|
99 |
with gr.TabItem("Distribution Plot"):
|
100 |
gr.Markdown("Distribution of **Sum Assured**. Data is generated uniformly; a normal curve is fitted for illustration.")
|
101 |
distribution_plot_display = gr.Plot()
|
102 |
with gr.TabItem("Categorical Summary"):
|
103 |
-
|
|
|
|
|
|
|
|
|
104 |
|
105 |
# 3. Event Handlers
|
106 |
def handle_generate_button_click(
|
107 |
mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed
|
108 |
):
|
109 |
-
|
110 |
-
|
|
|
|
|
|
|
|
|
111 |
current_df = df_state.value if df_state.value is not None else pd.DataFrame()
|
112 |
|
113 |
if int(age_m) >= int(age_mx):
|
114 |
gr.Warning("Minimum Age must be less than Maximum Age.")
|
115 |
-
return current_df, df_state.value,
|
116 |
if float(sa_m) >= float(sa_mx):
|
117 |
gr.Warning("Minimum Sum Assured must be less than Maximum Sum Assured.")
|
118 |
-
return current_df, df_state.value,
|
119 |
-
|
|
|
|
|
|
|
120 |
gr.Info("Generating model points... Please wait.")
|
121 |
try:
|
122 |
df = generate_custom_model_points(mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed)
|
123 |
gr.Info(f"{len(df)} model points generated successfully!")
|
124 |
|
|
|
125 |
numerical_cols = ['age_at_entry', 'sum_assured', 'duration_mth', 'policy_count']
|
126 |
existing_numerical_cols = [col for col in numerical_cols if col in df.columns]
|
|
|
127 |
if existing_numerical_cols and not df.empty:
|
128 |
desc_stats = df[existing_numerical_cols].describe().transpose()
|
129 |
if 'count' in desc_stats.columns:
|
130 |
desc_stats['count'] = desc_stats['count'].astype(int)
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
fig = None
|
140 |
if 'sum_assured' in df.columns and not df['sum_assured'].empty:
|
141 |
fig, ax = plt.subplots(figsize=(8, 5))
|
142 |
ax.hist(df['sum_assured'], bins=50, density=True, alpha=0.7, color='skyblue', edgecolor='black', label='Actual Distribution')
|
143 |
-
mu,
|
144 |
xmin_hist, xmax_hist = ax.get_xlim()
|
145 |
x_norm = np.linspace(xmin_hist, xmax_hist, 100)
|
146 |
-
p_norm = stats.norm.pdf(x_norm, mu,
|
147 |
-
ax.plot(x_norm, p_norm, 'r--', linewidth=2, label=f'Fitted Normal (μ={mu:,.0f}, σ={
|
148 |
ax.set_title(f"Sum Assured Distribution (N={len(df)})", fontsize=14)
|
149 |
ax.set_xlabel("Sum Assured ($)", fontsize=12)
|
150 |
ax.set_ylabel("Density", fontsize=12)
|
151 |
ax.legend()
|
152 |
ax.grid(axis='y', linestyle='--', alpha=0.7)
|
153 |
plt.tight_layout()
|
154 |
-
plot_object = fig
|
|
|
|
|
|
|
|
|
155 |
|
156 |
-
cat_summary_md = "### Categorical Data Frequencies\n\n"
|
157 |
if not df.empty:
|
158 |
if 'sex' in df.columns:
|
159 |
sex_counts = df['sex'].value_counts().reset_index()
|
160 |
sex_counts.columns = ['Sex', 'Count']
|
161 |
-
sex_counts['Percentage'] = (sex_counts['Count'] / sex_counts['Count'].sum() * 100).round(2)
|
162 |
-
|
|
|
|
|
|
|
163 |
if 'policy_term' in df.columns:
|
164 |
term_counts = df['policy_term'].value_counts().sort_index().reset_index()
|
165 |
term_counts.columns = ['Policy Term (Years)', 'Count']
|
166 |
-
term_counts['Percentage'] = (term_counts['Count'] / term_counts['Count'].sum() * 100).round(2)
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
|
|
|
|
172 |
|
173 |
-
return df, df,
|
174 |
|
175 |
except Exception as e:
|
176 |
gr.Error(f"An error occurred during generation: {str(e)}")
|
177 |
-
|
|
|
178 |
|
179 |
|
180 |
def handle_download_button_click(current_df_to_download):
|
181 |
if current_df_to_download is None or not isinstance(current_df_to_download, pd.DataFrame) or current_df_to_download.empty:
|
182 |
gr.Warning("No data available to download. Generate model points first.")
|
183 |
-
|
|
|
|
|
|
|
184 |
excel_output = io.BytesIO()
|
|
|
|
|
185 |
current_df_to_download.to_excel(excel_output, sheet_name='ModelPoints', engine='xlsxwriter', index=False)
|
186 |
excel_output.seek(0)
|
187 |
-
|
|
|
|
|
|
|
|
|
188 |
|
189 |
inputs_list = [
|
190 |
mp_count_input, seed_input, age_min_input, age_max_input,
|
@@ -197,14 +231,17 @@ with gr.Blocks() as demo: # Removed theme=gr.themes.Soft() to use Gradio default
|
|
197 |
inputs=inputs_list,
|
198 |
outputs=[
|
199 |
model_points_display, df_state, summary_stats_display,
|
200 |
-
distribution_plot_display,
|
201 |
]
|
202 |
)
|
203 |
-
|
|
|
|
|
|
|
204 |
download_excel_btn.click(
|
205 |
fn=handle_download_button_click,
|
206 |
inputs=[df_state],
|
207 |
-
outputs=[download_excel_btn]
|
208 |
)
|
209 |
|
210 |
if __name__ == "__main__":
|
|
|
34 |
sex_col = np.full(mp_count_val, "U")
|
35 |
|
36 |
if not policy_terms_selection_val:
|
37 |
+
policy_terms_selection_val = [10, 15, 20] # Default if empty
|
38 |
policy_term_options = np.array(policy_terms_selection_val).astype(int)
|
39 |
+
if len(policy_term_options) == 0: # Handle case where user deselects all
|
40 |
+
policy_term_options = np.array([10, 15, 20]) # Fallback to default
|
41 |
+
gr.Warning("No policy terms selected. Using default terms [10, 15, 20].")
|
42 |
+
|
43 |
policy_term_col = rng.choice(policy_term_options, size=mp_count_val)
|
44 |
|
45 |
sum_assured_col = np.round((sa_max_val - sa_min_val) * rng.random(size=mp_count_val) + sa_min_val, -3)
|
46 |
+
|
47 |
max_duration_val = policy_term_col * 12 - 1
|
48 |
max_duration_val = np.maximum(1, max_duration_val)
|
49 |
duration_mth_col = (rng.random(size=mp_count_val) * max_duration_val).astype(int) + 1
|
|
|
65 |
return model_point_df
|
66 |
|
67 |
# 2. Gradio App Definition
|
68 |
+
with gr.Blocks() as demo:
|
69 |
gr.Markdown("# Actuarial Model Points Generator")
|
70 |
gr.Markdown(
|
71 |
"Configure the parameters below to generate a custom set of seriatim model points. "
|
|
|
93 |
generate_btn = gr.Button("Generate Model Points", variant="primary")
|
94 |
|
95 |
with gr.Column(scale=2):
|
96 |
+
model_points_display = gr.Dataframe(label="Generated Model Points", wrap=True, height=400)
|
97 |
+
download_excel_btn = gr.DownloadButton(label="Download Excel", variant="secondary") # Removed value, will be set by click
|
98 |
gr.Markdown("---")
|
99 |
gr.Markdown("## 📊 Data Summary & Analysis")
|
100 |
with gr.Tabs():
|
101 |
with gr.TabItem("Numerical Summary"):
|
102 |
+
summary_stats_display = gr.Dataframe(label="Descriptive Statistics", wrap=True)
|
103 |
with gr.TabItem("Distribution Plot"):
|
104 |
gr.Markdown("Distribution of **Sum Assured**. Data is generated uniformly; a normal curve is fitted for illustration.")
|
105 |
distribution_plot_display = gr.Plot()
|
106 |
with gr.TabItem("Categorical Summary"):
|
107 |
+
# For simplicity, we'll display sex distribution here.
|
108 |
+
# You might want to add another Dataframe for policy_term or combine them.
|
109 |
+
sex_summary_display = gr.Dataframe(label="Sex Distribution", wrap=True)
|
110 |
+
policy_term_summary_display = gr.Dataframe(label="Policy Term Distribution", wrap=True)
|
111 |
+
|
112 |
|
113 |
# 3. Event Handlers
|
114 |
def handle_generate_button_click(
|
115 |
mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed
|
116 |
):
|
117 |
+
empty_df = pd.DataFrame()
|
118 |
+
empty_plot = None
|
119 |
+
no_data_df_num = pd.DataFrame({'Message': ["No data generated or summary failed."]})
|
120 |
+
no_data_df_cat = pd.DataFrame({'Message': ["No data generated or summary failed."]})
|
121 |
+
|
122 |
+
|
123 |
current_df = df_state.value if df_state.value is not None else pd.DataFrame()
|
124 |
|
125 |
if int(age_m) >= int(age_mx):
|
126 |
gr.Warning("Minimum Age must be less than Maximum Age.")
|
127 |
+
return current_df, df_state.value, pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]}), empty_plot, pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]}), pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]})
|
128 |
if float(sa_m) >= float(sa_mx):
|
129 |
gr.Warning("Minimum Sum Assured must be less than Maximum Sum Assured.")
|
130 |
+
return current_df, df_state.value, pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]}), empty_plot, pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]}), pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]})
|
131 |
+
if not p_terms:
|
132 |
+
gr.Warning("At least one Policy Term must be selected. Using defaults.")
|
133 |
+
p_terms = [10, 15, 20] # Apply default if none selected
|
134 |
+
|
135 |
gr.Info("Generating model points... Please wait.")
|
136 |
try:
|
137 |
df = generate_custom_model_points(mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed)
|
138 |
gr.Info(f"{len(df)} model points generated successfully!")
|
139 |
|
140 |
+
# Numerical Summary
|
141 |
numerical_cols = ['age_at_entry', 'sum_assured', 'duration_mth', 'policy_count']
|
142 |
existing_numerical_cols = [col for col in numerical_cols if col in df.columns]
|
143 |
+
desc_stats_df = no_data_df_num
|
144 |
if existing_numerical_cols and not df.empty:
|
145 |
desc_stats = df[existing_numerical_cols].describe().transpose()
|
146 |
if 'count' in desc_stats.columns:
|
147 |
desc_stats['count'] = desc_stats['count'].astype(int)
|
148 |
+
# Gradio Dataframe handles formatting, so raw numbers are fine
|
149 |
+
desc_stats_df = desc_stats.reset_index().rename(columns={'index': 'Statistic'})
|
150 |
+
elif df.empty:
|
151 |
+
desc_stats_df = pd.DataFrame({'Message': ["DataFrame is empty, no numerical data to describe."]})
|
152 |
+
|
153 |
+
|
154 |
+
# Distribution Plot
|
155 |
+
fig = empty_plot
|
|
|
156 |
if 'sum_assured' in df.columns and not df['sum_assured'].empty:
|
157 |
fig, ax = plt.subplots(figsize=(8, 5))
|
158 |
ax.hist(df['sum_assured'], bins=50, density=True, alpha=0.7, color='skyblue', edgecolor='black', label='Actual Distribution')
|
159 |
+
mu, std_dev = stats.norm.fit(df['sum_assured'])
|
160 |
xmin_hist, xmax_hist = ax.get_xlim()
|
161 |
x_norm = np.linspace(xmin_hist, xmax_hist, 100)
|
162 |
+
p_norm = stats.norm.pdf(x_norm, mu, std_dev)
|
163 |
+
ax.plot(x_norm, p_norm, 'r--', linewidth=2, label=f'Fitted Normal (μ={mu:,.0f}, σ={std_dev:,.0f})')
|
164 |
ax.set_title(f"Sum Assured Distribution (N={len(df)})", fontsize=14)
|
165 |
ax.set_xlabel("Sum Assured ($)", fontsize=12)
|
166 |
ax.set_ylabel("Density", fontsize=12)
|
167 |
ax.legend()
|
168 |
ax.grid(axis='y', linestyle='--', alpha=0.7)
|
169 |
plt.tight_layout()
|
170 |
+
# Removed plot_object = fig, fig is directly returned
|
171 |
+
|
172 |
+
# Categorical Summary
|
173 |
+
sex_counts_df = no_data_df_cat
|
174 |
+
term_counts_df = no_data_df_cat
|
175 |
|
|
|
176 |
if not df.empty:
|
177 |
if 'sex' in df.columns:
|
178 |
sex_counts = df['sex'].value_counts().reset_index()
|
179 |
sex_counts.columns = ['Sex', 'Count']
|
180 |
+
sex_counts['Percentage'] = (sex_counts['Count'] / sex_counts['Count'].sum() * 100).round(2) # Keep as number for potential sorting in Dataframe
|
181 |
+
sex_counts_df = sex_counts
|
182 |
+
else:
|
183 |
+
sex_counts_df = pd.DataFrame({'Message': ["'sex' column not found or data is empty."]})
|
184 |
+
|
185 |
if 'policy_term' in df.columns:
|
186 |
term_counts = df['policy_term'].value_counts().sort_index().reset_index()
|
187 |
term_counts.columns = ['Policy Term (Years)', 'Count']
|
188 |
+
term_counts['Percentage'] = (term_counts['Count'] / term_counts['Count'].sum() * 100).round(2) # Keep as number
|
189 |
+
term_counts_df = term_counts
|
190 |
+
else:
|
191 |
+
term_counts_df = pd.DataFrame({'Message': ["'policy_term' column not found or data is empty."]})
|
192 |
+
elif df.empty:
|
193 |
+
sex_counts_df = pd.DataFrame({'Message': ["DataFrame is empty, no categorical data for 'sex'."]})
|
194 |
+
term_counts_df = pd.DataFrame({'Message': ["DataFrame is empty, no categorical data for 'policy_term'."]})
|
195 |
+
|
196 |
|
197 |
+
return df, df, desc_stats_df, fig, sex_counts_df, term_counts_df
|
198 |
|
199 |
except Exception as e:
|
200 |
gr.Error(f"An error occurred during generation: {str(e)}")
|
201 |
+
error_df = pd.DataFrame({'Error': [str(e)]})
|
202 |
+
return current_df, df_state.value, error_df, empty_plot, error_df, error_df
|
203 |
|
204 |
|
205 |
def handle_download_button_click(current_df_to_download):
|
206 |
if current_df_to_download is None or not isinstance(current_df_to_download, pd.DataFrame) or current_df_to_download.empty:
|
207 |
gr.Warning("No data available to download. Generate model points first.")
|
208 |
+
# Return a dummy file path or None if Gradio handles it gracefully for gr.DownloadButton
|
209 |
+
# For safety, returning a named file path that won't be created avoids errors.
|
210 |
+
return gr.DownloadButton.update(interactive=False) # Or simply return None if that works for your Gradio version.
|
211 |
+
|
212 |
excel_output = io.BytesIO()
|
213 |
+
# Ensure 'Statistic' column from describe().transpose().reset_index() is handled if it exists
|
214 |
+
# The main df (current_df_to_download) should be fine as is.
|
215 |
current_df_to_download.to_excel(excel_output, sheet_name='ModelPoints', engine='xlsxwriter', index=False)
|
216 |
excel_output.seek(0)
|
217 |
+
# When returning a file for gr.DownloadButton, Gradio expects a file path or a BytesIO object
|
218 |
+
# The 'value' of the DownloadButton component itself is not what's downloaded.
|
219 |
+
# The function should return the file object.
|
220 |
+
return gr.File(file_path=excel_output, file_name="model_points.xlsx", visible=True)
|
221 |
+
|
222 |
|
223 |
inputs_list = [
|
224 |
mp_count_input, seed_input, age_min_input, age_max_input,
|
|
|
231 |
inputs=inputs_list,
|
232 |
outputs=[
|
233 |
model_points_display, df_state, summary_stats_display,
|
234 |
+
distribution_plot_display, sex_summary_display, policy_term_summary_display
|
235 |
]
|
236 |
)
|
237 |
+
|
238 |
+
# The DownloadButton's click function should return the file object to the button itself.
|
239 |
+
# The `value` parameter in gr.DownloadButton is for the initial filename,
|
240 |
+
# but the actual file content comes from the function's return.
|
241 |
download_excel_btn.click(
|
242 |
fn=handle_download_button_click,
|
243 |
inputs=[df_state],
|
244 |
+
outputs=[download_excel_btn] # The output should be the button itself to receive the file
|
245 |
)
|
246 |
|
247 |
if __name__ == "__main__":
|