Spaces:

alidenewade
/

actuarial-model-point-generator

Sleeping

App Files Files Community

alidenewade commited on May 24

Commit

0b55761

verified ·

1 Parent(s): b6acbd8

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -41

app.py CHANGED Viewed

@@ -34,12 +34,16 @@ def generate_custom_model_points(
         sex_col = np.full(mp_count_val, "U")
     if not policy_terms_selection_val:
-        policy_terms_selection_val = [10, 15, 20]
     policy_term_options = np.array(policy_terms_selection_val).astype(int)
     policy_term_col = rng.choice(policy_term_options, size=mp_count_val)
     sum_assured_col = np.round((sa_max_val - sa_min_val) * rng.random(size=mp_count_val) + sa_min_val, -3)
     max_duration_val = policy_term_col * 12 - 1
     max_duration_val = np.maximum(1, max_duration_val)
     duration_mth_col = (rng.random(size=mp_count_val) * max_duration_val).astype(int) + 1
@@ -61,7 +65,7 @@ def generate_custom_model_points(
     return model_point_df
 # 2. Gradio App Definition
-with gr.Blocks() as demo: # Removed theme=gr.themes.Soft() to use Gradio default
     gr.Markdown("# Actuarial Model Points Generator")
     gr.Markdown(
         "Configure the parameters below to generate a custom set of seriatim model points. "
@@ -89,102 +93,132 @@ with gr.Blocks() as demo: # Removed theme=gr.themes.Soft() to use Gradio default
             generate_btn = gr.Button("Generate Model Points", variant="primary")
         with gr.Column(scale=2):
-            model_points_display = gr.Dataframe(label="Generated Model Points", wrap=True)
-            download_excel_btn = gr.DownloadButton(label="Download Excel", value="model_points.xlsx", variant="secondary")
             gr.Markdown("---")
             gr.Markdown("## 📊 Data Summary & Analysis")
             with gr.Tabs():
                 with gr.TabItem("Numerical Summary"):
-                    summary_stats_display = gr.Markdown(value="*No data generated yet or summary failed.*")
                 with gr.TabItem("Distribution Plot"):
                     gr.Markdown("Distribution of **Sum Assured**. Data is generated uniformly; a normal curve is fitted for illustration.")
                     distribution_plot_display = gr.Plot()
                 with gr.TabItem("Categorical Summary"):
-                    categorical_summary_display = gr.Markdown(value="*No data generated yet or summary failed.*")
     # 3. Event Handlers
     def handle_generate_button_click(
         mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed
     ):
-        error_desc_md = "Error during generation. Check parameters."
-        error_cat_md = "Error during generation. Check parameters."
         current_df = df_state.value if df_state.value is not None else pd.DataFrame()
         if int(age_m) >= int(age_mx):
             gr.Warning("Minimum Age must be less than Maximum Age.")
-            return current_df, df_state.value, "Error: Minimum Age must be less than Maximum Age.", None, "Error: Minimum Age must be less than Maximum Age."
         if float(sa_m) >= float(sa_mx):
             gr.Warning("Minimum Sum Assured must be less than Maximum Sum Assured.")
-            return current_df, df_state.value, "Error: Minimum Sum Assured must be less than Maximum Sum Assured.", None, "Error: Minimum Sum Assured must be less than Maximum Sum Assured."
         gr.Info("Generating model points... Please wait.")
         try:
             df = generate_custom_model_points(mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed)
             gr.Info(f"{len(df)} model points generated successfully!")
             numerical_cols = ['age_at_entry', 'sum_assured', 'duration_mth', 'policy_count']
             existing_numerical_cols = [col for col in numerical_cols if col in df.columns]
             if existing_numerical_cols and not df.empty:
                 desc_stats = df[existing_numerical_cols].describe().transpose()
                 if 'count' in desc_stats.columns:
                     desc_stats['count'] = desc_stats['count'].astype(int)
-                float_format_cols = ['mean', 'std', 'min', '25%', '50%', '75%', 'max']
-                for col_name in float_format_cols:
-                    if col_name in desc_stats.columns and pd.api.types.is_numeric_dtype(desc_stats[col_name]):
-                        desc_stats[col_name] = desc_stats[col_name].apply(lambda x: f"{x:,.2f}" if pd.notnull(x) else x)
-                desc_stats_md = "### Descriptive Statistics\n" + desc_stats.to_markdown()
-            else:
-                desc_stats_md = "No numerical data to describe or DataFrame is empty."
-            fig = None
             if 'sum_assured' in df.columns and not df['sum_assured'].empty:
                 fig, ax = plt.subplots(figsize=(8, 5))
                 ax.hist(df['sum_assured'], bins=50, density=True, alpha=0.7, color='skyblue', edgecolor='black', label='Actual Distribution')
-                mu, std = stats.norm.fit(df['sum_assured'])
                 xmin_hist, xmax_hist = ax.get_xlim()
                 x_norm = np.linspace(xmin_hist, xmax_hist, 100)
-                p_norm = stats.norm.pdf(x_norm, mu, std)
-                ax.plot(x_norm, p_norm, 'r--', linewidth=2, label=f'Fitted Normal (μ={mu:,.0f}, σ={std:,.0f})')
                 ax.set_title(f"Sum Assured Distribution (N={len(df)})", fontsize=14)
                 ax.set_xlabel("Sum Assured ($)", fontsize=12)
                 ax.set_ylabel("Density", fontsize=12)
                 ax.legend()
                 ax.grid(axis='y', linestyle='--', alpha=0.7)
                 plt.tight_layout()
-            plot_object = fig
-            cat_summary_md = "### Categorical Data Frequencies\n\n"
             if not df.empty:
                 if 'sex' in df.columns:
                     sex_counts = df['sex'].value_counts().reset_index()
                     sex_counts.columns = ['Sex', 'Count']
-                    sex_counts['Percentage'] = (sex_counts['Count'] / sex_counts['Count'].sum() * 100).round(2).astype(str) + '%'
-                    cat_summary_md += "#### Sex Distribution\n" + sex_counts.to_markdown(index=False) + "\n\n"
                 if 'policy_term' in df.columns:
                     term_counts = df['policy_term'].value_counts().sort_index().reset_index()
                     term_counts.columns = ['Policy Term (Years)', 'Count']
-                    term_counts['Percentage'] = (term_counts['Count'] / term_counts['Count'].sum() * 100).round(2).astype(str) + '%'
-                    cat_summary_md += "#### Policy Term Distribution\n" + term_counts.to_markdown(index=False)
-                if cat_summary_md == "### Categorical Data Frequencies\n\n":
-                    cat_summary_md += "*No categorical columns ('sex', 'policy_term') found or data is empty.*"
-            else:
-                cat_summary_md = "*DataFrame is empty, no categorical data to summarize.*"
-            return df, df, desc_stats_md, plot_object, cat_summary_md
         except Exception as e:
             gr.Error(f"An error occurred during generation: {str(e)}")
-            return current_df, df_state.value, f"Error: {str(e)}", None, f"Error: {str(e)}"
     def handle_download_button_click(current_df_to_download):
         if current_df_to_download is None or not isinstance(current_df_to_download, pd.DataFrame) or current_df_to_download.empty:
             gr.Warning("No data available to download. Generate model points first.")
-            return None
         excel_output = io.BytesIO()
         current_df_to_download.to_excel(excel_output, sheet_name='ModelPoints', engine='xlsxwriter', index=False)
         excel_output.seek(0)
-        return excel_output
     inputs_list = [
         mp_count_input, seed_input, age_min_input, age_max_input,
@@ -197,14 +231,17 @@ with gr.Blocks() as demo: # Removed theme=gr.themes.Soft() to use Gradio default
         inputs=inputs_list,
         outputs=[
             model_points_display, df_state, summary_stats_display,
-            distribution_plot_display, categorical_summary_display
         ]
     )
     download_excel_btn.click(
         fn=handle_download_button_click,
         inputs=[df_state],
-        outputs=[download_excel_btn]
     )
 if __name__ == "__main__":

         sex_col = np.full(mp_count_val, "U")
     if not policy_terms_selection_val:
+        policy_terms_selection_val = [10, 15, 20] # Default if empty
     policy_term_options = np.array(policy_terms_selection_val).astype(int)
+    if len(policy_term_options) == 0: # Handle case where user deselects all
+        policy_term_options = np.array([10, 15, 20]) # Fallback to default
+        gr.Warning("No policy terms selected. Using default terms [10, 15, 20].")
     policy_term_col = rng.choice(policy_term_options, size=mp_count_val)
     sum_assured_col = np.round((sa_max_val - sa_min_val) * rng.random(size=mp_count_val) + sa_min_val, -3)
     max_duration_val = policy_term_col * 12 - 1
     max_duration_val = np.maximum(1, max_duration_val)
     duration_mth_col = (rng.random(size=mp_count_val) * max_duration_val).astype(int) + 1
     return model_point_df
 # 2. Gradio App Definition
+with gr.Blocks() as demo:
     gr.Markdown("# Actuarial Model Points Generator")
     gr.Markdown(
         "Configure the parameters below to generate a custom set of seriatim model points. "
             generate_btn = gr.Button("Generate Model Points", variant="primary")
         with gr.Column(scale=2):
+            model_points_display = gr.Dataframe(label="Generated Model Points", wrap=True, height=400)
+            download_excel_btn = gr.DownloadButton(label="Download Excel", variant="secondary") # Removed value, will be set by click
             gr.Markdown("---")
             gr.Markdown("## 📊 Data Summary & Analysis")
             with gr.Tabs():
                 with gr.TabItem("Numerical Summary"):
+                    summary_stats_display = gr.Dataframe(label="Descriptive Statistics", wrap=True)
                 with gr.TabItem("Distribution Plot"):
                     gr.Markdown("Distribution of **Sum Assured**. Data is generated uniformly; a normal curve is fitted for illustration.")
                     distribution_plot_display = gr.Plot()
                 with gr.TabItem("Categorical Summary"):
+                    # For simplicity, we'll display sex distribution here.
+                    # You might want to add another Dataframe for policy_term or combine them.
+                    sex_summary_display = gr.Dataframe(label="Sex Distribution", wrap=True)
+                    policy_term_summary_display = gr.Dataframe(label="Policy Term Distribution", wrap=True)
     # 3. Event Handlers
     def handle_generate_button_click(
         mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed
     ):
+        empty_df = pd.DataFrame()
+        empty_plot = None
+        no_data_df_num = pd.DataFrame({'Message': ["No data generated or summary failed."]})
+        no_data_df_cat = pd.DataFrame({'Message': ["No data generated or summary failed."]})
         current_df = df_state.value if df_state.value is not None else pd.DataFrame()
         if int(age_m) >= int(age_mx):
             gr.Warning("Minimum Age must be less than Maximum Age.")
+            return current_df, df_state.value, pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]}), empty_plot, pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]}), pd.DataFrame({'Error': ["Minimum Age must be less than Maximum Age."]})
         if float(sa_m) >= float(sa_mx):
             gr.Warning("Minimum Sum Assured must be less than Maximum Sum Assured.")
+            return current_df, df_state.value, pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]}), empty_plot, pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]}), pd.DataFrame({'Error': ["Minimum Sum Assured must be less than Maximum Sum Assured."]})
+        if not p_terms:
+            gr.Warning("At least one Policy Term must be selected. Using defaults.")
+            p_terms = [10, 15, 20] # Apply default if none selected
         gr.Info("Generating model points... Please wait.")
         try:
             df = generate_custom_model_points(mp_c, s, age_m, age_mx, sa_m, sa_mx, p_terms, incl_sex, pc_fixed)
             gr.Info(f"{len(df)} model points generated successfully!")
+            # Numerical Summary
             numerical_cols = ['age_at_entry', 'sum_assured', 'duration_mth', 'policy_count']
             existing_numerical_cols = [col for col in numerical_cols if col in df.columns]
+            desc_stats_df = no_data_df_num
             if existing_numerical_cols and not df.empty:
                 desc_stats = df[existing_numerical_cols].describe().transpose()
                 if 'count' in desc_stats.columns:
                     desc_stats['count'] = desc_stats['count'].astype(int)
+                # Gradio Dataframe handles formatting, so raw numbers are fine
+                desc_stats_df = desc_stats.reset_index().rename(columns={'index': 'Statistic'})
+            elif df.empty:
+                 desc_stats_df = pd.DataFrame({'Message': ["DataFrame is empty, no numerical data to describe."]})
+            # Distribution Plot
+            fig = empty_plot
             if 'sum_assured' in df.columns and not df['sum_assured'].empty:
                 fig, ax = plt.subplots(figsize=(8, 5))
                 ax.hist(df['sum_assured'], bins=50, density=True, alpha=0.7, color='skyblue', edgecolor='black', label='Actual Distribution')
+                mu, std_dev = stats.norm.fit(df['sum_assured'])
                 xmin_hist, xmax_hist = ax.get_xlim()
                 x_norm = np.linspace(xmin_hist, xmax_hist, 100)
+                p_norm = stats.norm.pdf(x_norm, mu, std_dev)
+                ax.plot(x_norm, p_norm, 'r--', linewidth=2, label=f'Fitted Normal (μ={mu:,.0f}, σ={std_dev:,.0f})')
                 ax.set_title(f"Sum Assured Distribution (N={len(df)})", fontsize=14)
                 ax.set_xlabel("Sum Assured ($)", fontsize=12)
                 ax.set_ylabel("Density", fontsize=12)
                 ax.legend()
                 ax.grid(axis='y', linestyle='--', alpha=0.7)
                 plt.tight_layout()
+            # Removed plot_object = fig, fig is directly returned
+            # Categorical Summary
+            sex_counts_df = no_data_df_cat
+            term_counts_df = no_data_df_cat
             if not df.empty:
                 if 'sex' in df.columns:
                     sex_counts = df['sex'].value_counts().reset_index()
                     sex_counts.columns = ['Sex', 'Count']
+                    sex_counts['Percentage'] = (sex_counts['Count'] / sex_counts['Count'].sum() * 100).round(2) # Keep as number for potential sorting in Dataframe
+                    sex_counts_df = sex_counts
+                else:
+                    sex_counts_df = pd.DataFrame({'Message': ["'sex' column not found or data is empty."]})
                 if 'policy_term' in df.columns:
                     term_counts = df['policy_term'].value_counts().sort_index().reset_index()
                     term_counts.columns = ['Policy Term (Years)', 'Count']
+                    term_counts['Percentage'] = (term_counts['Count'] / term_counts['Count'].sum() * 100).round(2) # Keep as number
+                    term_counts_df = term_counts
+                else:
+                    term_counts_df = pd.DataFrame({'Message': ["'policy_term' column not found or data is empty."]})
+            elif df.empty:
+                 sex_counts_df = pd.DataFrame({'Message': ["DataFrame is empty, no categorical data for 'sex'."]})
+                 term_counts_df = pd.DataFrame({'Message': ["DataFrame is empty, no categorical data for 'policy_term'."]})
+            return df, df, desc_stats_df, fig, sex_counts_df, term_counts_df
         except Exception as e:
             gr.Error(f"An error occurred during generation: {str(e)}")
+            error_df = pd.DataFrame({'Error': [str(e)]})
+            return current_df, df_state.value, error_df, empty_plot, error_df, error_df
     def handle_download_button_click(current_df_to_download):
         if current_df_to_download is None or not isinstance(current_df_to_download, pd.DataFrame) or current_df_to_download.empty:
             gr.Warning("No data available to download. Generate model points first.")
+            # Return a dummy file path or None if Gradio handles it gracefully for gr.DownloadButton
+            # For safety, returning a named file path that won't be created avoids errors.
+            return gr.DownloadButton.update(interactive=False) # Or simply return None if that works for your Gradio version.
         excel_output = io.BytesIO()
+        # Ensure 'Statistic' column from describe().transpose().reset_index() is handled if it exists
+        # The main df (current_df_to_download) should be fine as is.
         current_df_to_download.to_excel(excel_output, sheet_name='ModelPoints', engine='xlsxwriter', index=False)
         excel_output.seek(0)
+        # When returning a file for gr.DownloadButton, Gradio expects a file path or a BytesIO object
+        # The 'value' of the DownloadButton component itself is not what's downloaded.
+        # The function should return the file object.
+        return gr.File(file_path=excel_output, file_name="model_points.xlsx", visible=True)
     inputs_list = [
         mp_count_input, seed_input, age_min_input, age_max_input,
         inputs=inputs_list,
         outputs=[
             model_points_display, df_state, summary_stats_display,
+            distribution_plot_display, sex_summary_display, policy_term_summary_display
         ]
     )
+    # The DownloadButton's click function should return the file object to the button itself.
+    # The `value` parameter in gr.DownloadButton is for the initial filename,
+    # but the actual file content comes from the function's return.
     download_excel_btn.click(
         fn=handle_download_button_click,
         inputs=[df_state],
+        outputs=[download_excel_btn] # The output should be the button itself to receive the file
     )
 if __name__ == "__main__":