Spaces:

alidenewade
/

actuarial-model-point-generator

Sleeping

App Files Files Community

alidenewade commited on May 23

Commit

4a7f563

verified ·

1 Parent(s): b4e00d8

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -367

app.py CHANGED Viewed

@@ -1,394 +1,123 @@
 import gradio as gr
 import pandas as pd
 import numpy as np
-import matplotlib.pyplot as plt
-import seaborn as sns
 from numpy.random import default_rng
-import plotly.express as px
-import plotly.graph_objects as go
-from plotly.subplots import make_subplots
-import warnings
-warnings.filterwarnings('ignore')
-# Set style for matplotlib
-plt.style.use('default')
-sns.set_palette("husl")
-def generate_model_points(mp_count=10000, age_min=20, age_max=59,
-                          sum_assured_min=10000, sum_assured_max=1000000,
-                          policy_terms=[10, 15, 20], include_sex=True,
-                          policy_count_fixed=True, seed=12345):
     """
-    Generate seriatim model points for actuarial analysis
     """
-    # Set random seed for reproducibility
-    rng = default_rng(seed)
-    # Issue Age (Integer): age_min - age_max year old
-    age_at_entry = rng.integers(low=age_min, high=age_max+1, size=mp_count)
-    # Sex (Char) - optional
-    if include_sex:
-        Sex = ["M", "F"]
-        sex = np.fromiter(map(lambda i: Sex[i], rng.integers(low=0, high=len(Sex), size=mp_count)), np.dtype('<U1'))
-    else:
-        sex = np.full(mp_count, "U")  # Unknown/Unspecified
-    # Policy Term (Integer): from policy_terms list
-    policy_term_options = np.array(policy_terms)
-    policy_term = rng.choice(policy_term_options, size=mp_count)
-    # Sum Assured (Float): sum_assured_min - sum_assured_max
-    sum_assured = np.round((sum_assured_max - sum_assured_min) * rng.random(size=mp_count) + sum_assured_min, -3)
-    # Duration in month (Int): 1 <= Duration(mth) < Policy Term in month
-    duration_mth = np.floor((policy_term * 12 - 1) * rng.random(size=mp_count)).astype(int) + 1
-    # Policy Count (Integer): 1 (fixed) or variable
-    if policy_count_fixed:
-        policy_count = np.ones(mp_count, dtype=int)
-    else:
-        policy_count = rng.integers(low=1, high=101, size=mp_count)
     # Create DataFrame
-    attrs = ["age_at_entry", "sex", "policy_term", "policy_count", "sum_assured", "duration_mth"]
-    data = [age_at_entry, sex, policy_term, policy_count, sum_assured, duration_mth]
-    model_point_table = pd.DataFrame(dict(zip(attrs, data)), index=range(1, mp_count+1))
-    model_point_table.index.name = "policy_id"
-    return model_point_table
-def create_summary_stats(df):
-    """Generate summary statistics for the model points"""
-    summary_stats = []
-    # Numeric columns
-    numeric_cols = ['age_at_entry', 'policy_term', 'sum_assured', 'duration_mth', 'policy_count']
-    for col in numeric_cols:
-        if col in df.columns:
-            stats = {
-                'Variable': col.replace('_', ' ').title(),
-                'Count': f"{len(df[col]):,}",
-                'Mean': f"{df[col].mean():.2f}",
-                'Std Dev': f"{df[col].std():.2f}",
-                'Min': f"{df[col].min():,.0f}",
-                'Max': f"{df[col].max():,.0f}",
-                'Median': f"{df[col].median():.2f}"
-            }
-            summary_stats.append(stats)
-    # Categorical columns
-    if 'sex' in df.columns:
-        sex_counts = df['sex'].value_counts()
-        for sex_val, count in sex_counts.items():
-            stats = {
-                'Variable': f'Sex ({sex_val})',
-                'Count': f"{count:,}",
-                'Mean': f"{count/len(df)*100:.1f}%",
-                'Std Dev': '-',
-                'Min': '-',
-                'Max': '-',
-                'Median': '-'
-            }
-            summary_stats.append(stats)
-    return pd.DataFrame(summary_stats)
-def create_distribution_plots(df):
-    """Create distribution plots for key variables"""
-    fig = make_subplots(
-        rows=2, cols=3,
-        subplot_titles=('Age at Entry', 'Policy Term', 'Sum Assured',
-                        'Duration (Months)', 'Policy Count', 'Sex Distribution'),
-        specs=[[{'type': 'histogram'}, {'type': 'histogram'}, {'type': 'histogram'}],
-               [{'type': 'histogram'}, {'type': 'histogram'}, {'type': 'bar'}]]
-    )
-    # Age at Entry
-    fig.add_trace(
-        go.Histogram(x=df['age_at_entry'], name='Age at Entry', nbinsx=20),
-        row=1, col=1
-    )
-    # Policy Term
-    fig.add_trace(
-        go.Histogram(x=df['policy_term'], name='Policy Term', nbinsx=10),
-        row=1, col=2
-    )
-    # Sum Assured
-    fig.add_trace(
-        go.Histogram(x=df['sum_assured'], name='Sum Assured', nbinsx=30),
-        row=1, col=3
     )
-    # Duration in Months
-    fig.add_trace(
-        go.Histogram(x=df['duration_mth'], name='Duration (Months)', nbinsx=25),
-        row=2, col=1
-    )
-    # Policy Count
-    fig.add_trace(
-        go.Histogram(x=df['policy_count'], name='Policy Count', nbinsx=20),
-        row=2, col=2
-    )
-    # Sex Distribution
-    if 'sex' in df.columns:
-        sex_counts = df['sex'].value_counts()
-        fig.add_trace(
-            go.Bar(x=sex_counts.index, y=sex_counts.values, name='Sex Distribution'),
-            row=2, col=3
-        )
-    fig.update_layout(
-        height=800,
-        title_text="Model Points Distribution Analysis",
-        showlegend=False
-    )
-    return fig
-def create_correlation_heatmap(df):
-    """Create correlation heatmap for numeric variables"""
-    numeric_cols = ['age_at_entry', 'policy_term', 'sum_assured', 'duration_mth', 'policy_count']
-    available_cols = [col for col in numeric_cols if col in df.columns]
-    if len(available_cols) > 1:
-        corr_matrix = df[available_cols].corr()
-        fig = go.Figure(data=go.Heatmap(
-            z=corr_matrix.values,
-            x=corr_matrix.columns,
-            y=corr_matrix.columns,
-            colorscale='RdBu',
-            zmid=0,
-            text=corr_matrix.values.round(3),
-            texttemplate='%{text}',
-            textfont={"size": 12},
-            hoverongaps=False
-        ))
-        fig.update_layout(
-            title='Correlation Matrix of Model Point Variables',
-            width=600,
-            height=500
-        )
-        return fig
-    else:
-        return go.Figure().add_annotation(text="Not enough numeric variables for correlation analysis")
-def create_age_term_analysis(df):
-    """Create age vs policy term analysis"""
-    fig = px.box(df, x='policy_term', y='age_at_entry',
-                 title='Age at Entry Distribution by Policy Term',
-                 labels={'policy_term': 'Policy Term (Years)', 'age_at_entry': 'Age at Entry'})
-    fig.update_layout(height=400)
-    return fig
-def create_portfolio_metrics(df):
-    """Calculate portfolio-level metrics"""
-    metrics = {}
-    # Total exposure
-    metrics['Total Policies'] = f"{len(df):,}"
-    metrics['Total Sum Assured'] = f"${df['sum_assured'].sum():,.0f}"
-    metrics['Average Sum Assured'] = f"${df['sum_assured'].mean():,.0f}"
-    # Age metrics
-    metrics['Average Age at Entry'] = f"{df['age_at_entry'].mean():.1f} years"
-    metrics['Age Range'] = f"{df['age_at_entry'].min()}-{df['age_at_entry'].max()} years"
-    # Policy term metrics
-    metrics['Average Policy Term'] = f"{df['policy_term'].mean():.1f} years"
-    term_dist = df['policy_term'].value_counts().sort_index()
-    metrics['Policy Term Distribution'] = ', '.join([f"{term}Y: {count:,}" for term, count in term_dist.items()])
-    # Duration metrics
-    metrics['Average Duration'] = f"{df['duration_mth'].mean():.1f} months"
-    metrics['Duration Range'] = f"{df['duration_mth'].min()}-{df['duration_mth'].max()} months"
-    # Convert to DataFrame for display
-    metrics_df = pd.DataFrame(list(metrics.items()), columns=['Metric', 'Value'])
-    return metrics_df
-def export_to_csv(df):
-    """Export dataframe to CSV string"""
-    return df.to_csv()
-# Create the Gradio interface
-with gr.Blocks(title="Actuarial Model Points Generator") as demo:
-    gr.Markdown("""
-    # 📊 Actuarial Model Points Generator
-    Generate synthetic seriatim policy data for actuarial modeling, cluster analysis, and portfolio testing.
-    Perfect for creating realistic test datasets for insurance product development and risk analysis.
-    """)
     with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("### Generation Parameters")
-            # Basic parameters
-            mp_count = gr.Slider(
-                minimum=100, maximum=50000, value=10000, step=100,
-                label="Number of Model Points"
-            )
-            seed = gr.Number(
-                value=12345, precision=0,
-                label="Random Seed (for reproducibility)"
-            )
-            # Age parameters
-            gr.Markdown("#### Age Parameters")
-            age_min = gr.Slider(
-                minimum=18, maximum=40, value=20, step=1,
-                label="Minimum Age at Entry"
-            )
-            age_max = gr.Slider(
-                minimum=45, maximum=80, value=59, step=1,
-                label="Maximum Age at Entry"
-            )
-            # Sum Assured parameters
-            gr.Markdown("#### Sum Assured Parameters")
-            sum_assured_min = gr.Number(
-                value=10000,
-                label="Minimum Sum Assured ($)"
-            )
-            sum_assured_max = gr.Number(
-                value=1000000,
-                label="Maximum Sum Assured ($)"
-            )
-            # Policy options
-            gr.Markdown("#### Policy Options")
-            policy_terms = gr.CheckboxGroup(
-                choices=[5, 10, 15, 20, 25, 30],
-                value=[10, 15, 20],
-                label="Available Policy Terms (Years)"
-            )
-            include_sex = gr.Checkbox(
-                value=True,
-                label="Include Sex (M/F) in model points"
-            )
-            policy_count_fixed = gr.Checkbox(
-                value=True,
-                label="Fixed Policy Count = 1 (uncheck for variable 1-100)"
-            )
-            generate_btn = gr.Button("🎲 Generate Model Points", variant="primary")
-        with gr.Column(scale=2):
-            with gr.Tabs():
-                with gr.TabItem("📋 Data Table"):
-                    model_points_table = gr.Dataframe(
-                        label="Generated Model Points",
-                        # height=400, <-- This line was removed/commented out
-                        interactive=False
-                    )
-                    download_btn = gr.DownloadButton(
-                        label="📥 Download CSV",
-                        variant="secondary"
-                    )
-                with gr.TabItem("📊 Distributions"):
-                    distribution_plot = gr.Plot(label="Variable Distributions")
-                with gr.TabItem("📈 Analytics"):
-                    with gr.Row():
-                        correlation_plot = gr.Plot(label="Correlation Analysis")
-                        age_term_plot = gr.Plot(label="Age vs Policy Term")
-                with gr.TabItem("📋 Statistics"):
-                    with gr.Row():
-                        with gr.Column():
-                            portfolio_metrics = gr.Dataframe(label="Portfolio Metrics")
-                        with gr.Column():
-                            summary_stats = gr.Dataframe(label="Summary Statistics")
-    gr.Markdown("""
-    ### 🎯 Use Cases
-    **Actuarial Applications:**
-    - **Cluster Analysis**: Group similar policies for pricing and reserving
-    - **Portfolio Testing**: Stress test models with synthetic data
-    - **Product Development**: Analyze policy mix and profitability
-    - **Risk Management**: Understand exposure concentrations
-    **Key Features:**
-    - **Realistic Distributions**: Age, term, and sum assured follow typical insurance patterns
-    - **Existing Policies**: Duration > 0 represents in-force business
-    - **Flexible Parameters**: Customize age ranges, policy terms, and sum assured limits
-    - **Reproducible**: Fixed seed ensures consistent results
-    **Generated Variables:**
-    - `policy_id`: Unique identifier for each policy
-    - `age_at_entry`: Issue age (customizable range)
-    - `sex`: M/F indicator (optional)
-    - `policy_term`: Term in years (selectable options)
-    - `policy_count`: Number of policies (1 or variable)
-    - `sum_assured`: Coverage amount (customizable range)
-    - `duration_mth`: Months since issue (1 to term-1)
-    """)
-    # Event handlers
-    def generate_and_analyze(mp_count, age_min, age_max, sum_assured_min, sum_assured_max,
-                             policy_terms, include_sex, policy_count_fixed, seed):
-        """Generate model points and all analyses"""
-        if not policy_terms:
-            policy_terms = [10, 15, 20]  # Default if none selected
-        # Generate model points
-        df = generate_model_points(
-            mp_count=int(mp_count),
-            age_min=int(age_min),
-            age_max=int(age_max),
-            sum_assured_min=sum_assured_min,
-            sum_assured_max=sum_assured_max,
-            policy_terms=policy_terms,
-            include_sex=include_sex,
-            policy_count_fixed=policy_count_fixed,
-            seed=int(seed)
-        )
-        # Generate analyses
-        dist_plot = create_distribution_plots(df)
-        corr_plot = create_correlation_heatmap(df)
-        age_term_plot = create_age_term_analysis(df)
-        portfolio_metrics_df = create_portfolio_metrics(df)
-        summary_stats_df = create_summary_stats(df)
-        csv_data = export_to_csv(df)
-        return (df, dist_plot, corr_plot, age_term_plot,
-                portfolio_metrics_df, summary_stats_df, csv_data)
-    # Connect the generate button
     generate_btn.click(
-        fn=generate_and_analyze,
-        inputs=[mp_count, age_min, age_max, sum_assured_min, sum_assured_max,
-                policy_terms, include_sex, policy_count_fixed, seed],
-        outputs=[model_points_table, distribution_plot, correlation_plot,
-                 age_term_plot, portfolio_metrics, summary_stats, download_btn]
     )
-    # Initialize with default values
-    demo.load(
-        fn=generate_and_analyze,
-        inputs=[mp_count, age_min, age_max, sum_assured_min, sum_assured_max,
-                policy_terms, include_sex, policy_count_fixed, seed],
-        outputs=[model_points_table, distribution_plot, correlation_plot,
-                 age_term_plot, portfolio_metrics, summary_stats, download_btn]
     )
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 import pandas as pd
 import numpy as np
 from numpy.random import default_rng
+import io  # For BytesIO to handle file in memory
+# 1. Data Generation Function (adapted from your script)
+def generate_cluster_model_points():
     """
+    Generates seriatim model points based on the specifications
+    from generate_model_points_for_cluster.py.
     """
+    rng = default_rng(12345)  # Fixed seed for reproducibility
+    MPCount = 10000  # Number of Model Points
+    # Issue Age (Integer): 20 - 59 year old
+    age_at_entry = rng.integers(low=20, high=60, size=MPCount)
+    # Sex (Char)
+    sex_options = ["M", "F"]
+    sex_col = np.fromiter(map(lambda i: sex_options[i], rng.integers(low=0, high=len(sex_options), size=MPCount)), np.dtype('<U1'))
+    # Policy Term (Integer): 10, 15, 20
+    policy_term_col = rng.integers(low=0, high=3, size=MPCount) * 5 + 10
+    # Sum Assured (Float): 10,000 - 1,000,000
+    sum_assured_col = np.round((1000000 - 10000) * rng.random(size=MPCount) + 10000, -3)
+    # Duration in month (Int): 0 < Duration(mth) < Policy Term in month
+    # Ensures duration_mth is at least 1 and less than policy_term_col in months.
+    duration_mth_col = np.floor((policy_term_col * 12 - 1) * rng.random(size=MPCount)).astype(int) + 1
+    # Policy Count (Integer): 1 for all model points
+    policy_count_col = 1
     # Create DataFrame
+    data_dict = {
+        "age_at_entry": age_at_entry,
+        "sex": sex_col,
+        "policy_term": policy_term_col,
+        "policy_count": policy_count_col,  # Pandas will broadcast this scalar to all rows
+        "sum_assured": sum_assured_col,
+        "duration_mth": duration_mth_col
+    }
+    # Create index named "policy_id" starting from 1
+    model_point_df = pd.DataFrame(data_dict, index=pd.RangeIndex(start=1, stop=MPCount + 1, name="policy_id"))
+    return model_point_df
+# 2. Gradio App Definition
+with gr.Blocks() as demo:  # Default theme and font
+    gr.Markdown("# Actuarial Model Points Generator (Cluster Version)")
+    gr.Markdown(
+        "This app generates 10,000 seriatim model points based on the logic from the "
+        "`generate_model_points_for_cluster.py` script.\n"
+        "Click 'Generate Model Points' to view the table, then 'Download Excel' to save the data."
     )
+    # State to store the generated DataFrame
+    df_state = gr.State()
+    # UI Elements
     with gr.Row():
+        generate_btn = gr.Button("Generate Model Points", variant="primary")
+    model_points_display = gr.Dataframe(label="Generated Model Points")
+    download_excel_btn = gr.DownloadButton(
+        label="Download Excel",
+        value="model_points.xlsx", # Sets the default filename for download
+        variant="secondary"
+    )
+    # 3. Event Handlers
+    def handle_generate_button_click():
+        """
+        Called when the 'Generate Model Points' button is clicked.
+        Generates data and updates the UI.
+        """
+        gr.Info("Generating model points... Please wait.")
+        df = generate_cluster_model_points()
+        gr.Info(f"{len(df)} model points generated successfully!")
+        return df, df  # Update both the Dataframe display and the state
+    def handle_download_button_click(current_df_to_download):
+        """
+        Called when the 'Download Excel' button is clicked.
+        Prepares the DataFrame for download as an Excel file.
+        """
+        if current_df_to_download is None or current_df_to_download.empty:
+            gr.Warning("No data available to download. Please generate model points first.")
+            # Provide an empty Excel file to prevent download error if button is clicked prematurely
+            empty_excel_output = io.BytesIO()
+            pd.DataFrame().to_excel(empty_excel_output, index=False)
+            empty_excel_output.seek(0)
+            return empty_excel_output
+        excel_output = io.BytesIO()
+        # The DataFrame's index (policy_id) will be included by default
+        current_df_to_download.to_excel(excel_output, sheet_name='ModelPoints', engine='xlsxwriter', index=True)
+        excel_output.seek(0)
+        return excel_output
+    # Wire تعرض the button clicks to their handler functions
     generate_btn.click(
+        fn=handle_generate_button_click,
+        inputs=None,  # No inputs from UI needed for generation
+        outputs=[model_points_display, df_state]
     )
+    download_excel_btn.click(
+        fn=handle_download_button_click,
+        inputs=[df_state],  # Takes the DataFrame stored in the state
+        outputs=[download_excel_btn] # The DownloadButton itself is the output for file streams
     )
+    # Optionally, load data when the app starts (or leave it empty until generate is clicked)
+    # demo.load(handle_generate_button_click, outputs=[model_points_display, df_state])
 if __name__ == "__main__":
     demo.launch()