|
""" |
|
Spend Analyzer - Financial Intelligence and Analysis Module |
|
""" |
|
import pandas as pd |
|
import numpy as np |
|
from typing import Dict, List, Optional, Tuple |
|
from datetime import datetime, timedelta |
|
from dataclasses import dataclass, asdict |
|
import json |
|
from collections import defaultdict |
|
import logging |
|
|
|
@dataclass |
|
class SpendingInsight: |
|
category: str |
|
total_amount: float |
|
transaction_count: int |
|
average_transaction: float |
|
percentage_of_total: float |
|
trend: str |
|
top_merchants: List[str] |
|
|
|
@dataclass |
|
class BudgetAlert: |
|
category: str |
|
budget_limit: float |
|
current_spending: float |
|
percentage_used: float |
|
alert_level: str |
|
days_remaining: int |
|
|
|
@dataclass |
|
class FinancialSummary: |
|
total_income: float |
|
total_expenses: float |
|
net_cash_flow: float |
|
largest_expense: Dict |
|
most_frequent_category: str |
|
unusual_transactions: List[Dict] |
|
monthly_trends: Dict[str, float] |
|
|
|
class SpendAnalyzer: |
|
def __init__(self): |
|
self.logger = logging.getLogger(__name__) |
|
self.transactions_df = pd.DataFrame() |
|
self.budgets = {} |
|
|
|
def load_transactions(self, transactions: List) -> None: |
|
"""Load transactions into pandas DataFrame for analysis""" |
|
try: |
|
|
|
data = [] |
|
for trans in transactions: |
|
if hasattr(trans, '__dict__'): |
|
data.append(asdict(trans)) |
|
else: |
|
data.append(trans) |
|
|
|
self.transactions_df = pd.DataFrame(data) |
|
|
|
if not self.transactions_df.empty: |
|
|
|
self.transactions_df['date'] = pd.to_datetime(self.transactions_df['date']) |
|
|
|
|
|
self.transactions_df = self.transactions_df.sort_values('date') |
|
|
|
|
|
self.transactions_df['month'] = self.transactions_df['date'].dt.to_period('M') |
|
self.transactions_df['week'] = self.transactions_df['date'].dt.to_period('W') |
|
self.transactions_df['day_of_week'] = self.transactions_df['date'].dt.day_name() |
|
|
|
self.logger.info(f"Loaded {len(self.transactions_df)} transactions") |
|
|
|
except Exception as e: |
|
self.logger.error(f"Error loading transactions: {e}") |
|
raise |
|
|
|
def set_budgets(self, budgets: Dict[str, float]) -> None: |
|
"""Set budget limits for categories""" |
|
self.budgets = budgets |
|
|
|
def analyze_spending_by_category(self, months_back: int = 6) -> List[SpendingInsight]: |
|
"""Analyze spending patterns by category""" |
|
if self.transactions_df.empty: |
|
return [] |
|
|
|
|
|
cutoff_date = datetime.now() - timedelta(days=months_back * 30) |
|
recent_df = self.transactions_df[self.transactions_df['date'] >= cutoff_date] |
|
|
|
|
|
expenses_df = recent_df[recent_df['amount'] < 0].copy() |
|
expenses_df['amount'] = expenses_df['amount'].abs() |
|
|
|
insights = [] |
|
total_spending = expenses_df['amount'].sum() |
|
|
|
if total_spending == 0: |
|
self.logger.warning("Total spending is zero; no insights can be generated.") |
|
return insights |
|
|
|
|
|
category_stats = expenses_df.groupby('category').agg({ |
|
'amount': ['sum', 'count', 'mean'], |
|
'description': lambda x: list(x.value_counts().head(3).index) |
|
}).round(2) |
|
|
|
category_stats.columns = ['total', 'count', 'average', 'top_merchants'] |
|
|
|
for category, stats in category_stats.iterrows(): |
|
|
|
trend = self._calculate_trend(expenses_df, category) |
|
|
|
insight = SpendingInsight( |
|
category=category, |
|
total_amount=stats['total'], |
|
transaction_count=stats['count'], |
|
average_transaction=stats['average'], |
|
percentage_of_total=(stats['total'] / total_spending) * 100, |
|
trend=trend, |
|
top_merchants=stats['top_merchants'][:3] |
|
) |
|
insights.append(insight) |
|
|
|
|
|
insights.sort(key=lambda x: x.total_amount, reverse=True) |
|
return insights |
|
|
|
def _calculate_trend(self, df: pd.DataFrame, category: str) -> str: |
|
"""Calculate spending trend for a category""" |
|
try: |
|
category_df = df[df['category'] == category] |
|
monthly_spending = category_df.groupby('month')['amount'].sum() |
|
|
|
if len(monthly_spending) < 2: |
|
return 'stable' |
|
|
|
|
|
x = np.arange(len(monthly_spending)) |
|
y = monthly_spending.values |
|
slope = np.polyfit(x, y, 1)[0] |
|
|
|
if slope > 0.1: |
|
return 'increasing' |
|
elif slope < -0.1: |
|
return 'decreasing' |
|
else: |
|
return 'stable' |
|
|
|
except Exception: |
|
return 'stable' |
|
|
|
def check_budget_alerts(self) -> List[BudgetAlert]: |
|
"""Check for budget alerts and overspending""" |
|
if self.transactions_df.empty or not self.budgets: |
|
return [] |
|
|
|
alerts = [] |
|
current_month = datetime.now().replace(day=1) |
|
month_df = self.transactions_df[ |
|
(self.transactions_df['date'] >= current_month) & |
|
(self.transactions_df['amount'] < 0) |
|
].copy() |
|
|
|
month_df['amount'] = month_df['amount'].abs() |
|
|
|
|
|
import calendar |
|
days_in_month = calendar.monthrange(current_month.year, current_month.month)[1] |
|
days_remaining = days_in_month - datetime.now().day |
|
|
|
|
|
for category, budget_limit in self.budgets.items(): |
|
current_spending = month_df[month_df['category'] == category]['amount'].sum() |
|
percentage_used = (current_spending / budget_limit) * 100 |
|
|
|
|
|
if percentage_used >= 100: |
|
alert_level = 'critical' |
|
elif percentage_used >= 80: |
|
alert_level = 'warning' |
|
else: |
|
alert_level = 'info' |
|
|
|
alert = BudgetAlert( |
|
category=category, |
|
budget_limit=budget_limit, |
|
current_spending=current_spending, |
|
percentage_used=percentage_used, |
|
alert_level=alert_level, |
|
days_remaining=days_remaining |
|
) |
|
alerts.append(alert) |
|
|
|
return alerts |
|
|
|
def generate_financial_summary(self) -> FinancialSummary: |
|
"""Generate comprehensive financial summary""" |
|
if self.transactions_df.empty: |
|
return FinancialSummary(0, 0, 0, {}, "", [], {}) |
|
|
|
|
|
income_df = self.transactions_df[self.transactions_df['amount'] > 0] |
|
expense_df = self.transactions_df[self.transactions_df['amount'] < 0] |
|
|
|
total_income = income_df['amount'].sum() |
|
total_expenses = abs(expense_df['amount'].sum()) |
|
net_cash_flow = total_income - total_expenses |
|
|
|
|
|
if not expense_df.empty: |
|
largest_expense_row = expense_df.loc[expense_df['amount'].idxmin()] |
|
largest_expense = { |
|
'amount': abs(largest_expense_row['amount']), |
|
'description': largest_expense_row['description'], |
|
'date': largest_expense_row['date'].strftime('%Y-%m-%d'), |
|
'category': largest_expense_row['category'] |
|
} |
|
else: |
|
largest_expense = {} |
|
|
|
|
|
most_frequent_category = expense_df['category'].mode().iloc[0] if not expense_df.empty else "" |
|
|
|
|
|
unusual_transactions = self._detect_unusual_transactions() |
|
|
|
|
|
monthly_trends = self._calculate_monthly_trends() |
|
|
|
return FinancialSummary( |
|
total_income=total_income, |
|
total_expenses=total_expenses, |
|
net_cash_flow=net_cash_flow, |
|
largest_expense=largest_expense, |
|
most_frequent_category=most_frequent_category, |
|
unusual_transactions=unusual_transactions, |
|
monthly_trends=monthly_trends |
|
) |
|
|
|
def _detect_unusual_transactions(self) -> List[Dict]: |
|
"""Detect unusual transactions using statistical methods""" |
|
if self.transactions_df.empty: |
|
return [] |
|
|
|
unusual = [] |
|
|
|
|
|
for category in self.transactions_df['category'].unique(): |
|
category_df = self.transactions_df[ |
|
(self.transactions_df['category'] == category) & |
|
(self.transactions_df['amount'] < 0) |
|
].copy() |
|
|
|
if len(category_df) < 5: |
|
continue |
|
|
|
amounts = category_df['amount'].abs() |
|
Q1 = amounts.quantile(0.25) |
|
Q3 = amounts.quantile(0.75) |
|
IQR = Q3 - Q1 |
|
|
|
|
|
lower_bound = Q1 - 1.5 * IQR |
|
upper_bound = Q3 + 1.5 * IQR |
|
|
|
outliers = category_df[(amounts < lower_bound) | (amounts > upper_bound)] |
|
|
|
for _, row in outliers.iterrows(): |
|
unusual.append({ |
|
'date': row['date'].strftime('%Y-%m-%d'), |
|
'description': row['description'], |
|
'amount': abs(row['amount']), |
|
'category': row['category'], |
|
'reason': 'Amount significantly higher than usual for this category' |
|
}) |
|
|
|
|
|
daily_merchant = self.transactions_df.groupby([ |
|
self.transactions_df['date'].dt.date, 'description' |
|
]).size() |
|
|
|
frequent_same_day = daily_merchant[daily_merchant > 3] |
|
|
|
for (date, merchant), count in frequent_same_day.items(): |
|
unusual.append({ |
|
'date': str(date), |
|
'description': merchant, |
|
'count': count, |
|
'reason': f'{count} transactions with same merchant on same day' |
|
}) |
|
|
|
return unusual[:10] |
|
|
|
def _calculate_monthly_trends(self) -> Dict[str, float]: |
|
"""Calculate monthly spending trends""" |
|
if self.transactions_df.empty: |
|
return {} |
|
|
|
|
|
expense_df = self.transactions_df[self.transactions_df['amount'] < 0].copy() |
|
expense_df['amount'] = expense_df['amount'].abs() |
|
|
|
monthly_spending = expense_df.groupby('month')['amount'].sum() |
|
|
|
|
|
recent_months = monthly_spending.tail(6) |
|
|
|
trends = {} |
|
if len(recent_months) >= 2: |
|
|
|
x = np.arange(len(recent_months)) |
|
y = recent_months.values |
|
slope = np.polyfit(x, y, 1)[0] |
|
trends['overall_trend'] = slope |
|
|
|
|
|
if len(recent_months) >= 2: |
|
current_month = recent_months.iloc[-1] |
|
previous_month = recent_months.iloc[-2] |
|
mom_change = ((current_month - previous_month) / previous_month) * 100 |
|
trends['month_over_month_change'] = mom_change |
|
|
|
|
|
trends['average_monthly'] = recent_months.mean() |
|
trends['highest_month'] = recent_months.max() |
|
trends['lowest_month'] = recent_months.min() |
|
|
|
return trends |
|
|
|
def predict_future_spending(self, months_ahead: int = 3) -> Dict[str, float]: |
|
"""Predict future spending based on historical trends""" |
|
if self.transactions_df.empty: |
|
return {} |
|
|
|
|
|
expense_df = self.transactions_df[self.transactions_df['amount'] < 0].copy() |
|
expense_df['amount'] = expense_df['amount'].abs() |
|
|
|
monthly_category_spending = expense_df.groupby(['month', 'category'])['amount'].sum().unstack(fill_value=0) |
|
|
|
predictions = {} |
|
|
|
for category in monthly_category_spending.columns: |
|
category_data = monthly_category_spending[category] |
|
|
|
if len(category_data) >= 3: |
|
|
|
x = np.arange(len(category_data)) |
|
y = category_data.values |
|
|
|
|
|
coeffs = np.polyfit(x, y, 1) |
|
slope, intercept = coeffs |
|
|
|
|
|
future_months = [] |
|
for i in range(1, months_ahead + 1): |
|
future_x = len(category_data) + i - 1 |
|
predicted_amount = slope * future_x + intercept |
|
future_months.append(max(0, predicted_amount)) |
|
|
|
predictions[category] = { |
|
'next_month': future_months[0] if future_months else 0, |
|
'total_predicted': sum(future_months), |
|
'average_predicted': np.mean(future_months) if future_months else 0 |
|
} |
|
|
|
return predictions |
|
|
|
def get_spending_recommendations(self) -> List[str]: |
|
"""Generate spending recommendations based on analysis""" |
|
recommendations = [] |
|
|
|
if self.transactions_df.empty: |
|
return ["No transaction data available for analysis"] |
|
|
|
|
|
insights = self.analyze_spending_by_category() |
|
budget_alerts = self.check_budget_alerts() |
|
summary = self.generate_financial_summary() |
|
|
|
|
|
overspending_categories = [alert for alert in budget_alerts if alert.percentage_used > 100] |
|
if overspending_categories: |
|
for alert in overspending_categories: |
|
recommendations.append( |
|
f"You've exceeded your {alert.category} budget by " |
|
f"${alert.current_spending - alert.budget_limit:.2f} this month. " |
|
f"Consider reducing spending in this category." |
|
) |
|
|
|
|
|
if insights: |
|
top_category = insights[0] |
|
if top_category.percentage_of_total > 40: |
|
recommendations.append( |
|
f"{top_category.category} accounts for {top_category.percentage_of_total:.1f}% " |
|
f"of your spending. Consider if this allocation aligns with your priorities." |
|
) |
|
|
|
|
|
if summary.net_cash_flow < 0: |
|
recommendations.append( |
|
f"Your expenses (${summary.total_expenses:.2f}) exceed your income " |
|
f"(${summary.total_income:.2f}) by ${abs(summary.net_cash_flow):.2f}. " |
|
f"Focus on reducing expenses or increasing income." |
|
) |
|
|
|
|
|
increasing_categories = [i for i in insights if i.trend == 'increasing'] |
|
if increasing_categories: |
|
top_increasing = increasing_categories[0] |
|
recommendations.append( |
|
f"Your {top_increasing.category} spending is trending upward. " |
|
f"Monitor this category to avoid budget overruns." |
|
) |
|
|
|
|
|
if summary.unusual_transactions: |
|
recommendations.append( |
|
f"Found {len(summary.unusual_transactions)} unusual transactions. " |
|
f"Review these for potential errors or unauthorized charges." |
|
) |
|
|
|
|
|
decreasing_categories = [i for i in insights if i.trend == 'decreasing'] |
|
if decreasing_categories: |
|
recommendations.append( |
|
f"Great job reducing {decreasing_categories[0].category} spending! " |
|
f"This trend is helping improve your financial health." |
|
) |
|
|
|
if not recommendations: |
|
recommendations.append("Your spending patterns look healthy. Keep up the good work!") |
|
|
|
return recommendations |
|
|
|
def export_analysis_data(self) -> Dict: |
|
"""Export all analysis data for Claude API integration""" |
|
return { |
|
'spending_insights': [asdict(insight) for insight in self.analyze_spending_by_category()], |
|
'budget_alerts': [asdict(alert) for alert in self.check_budget_alerts()], |
|
'financial_summary': asdict(self.generate_financial_summary()), |
|
'predictions': self.predict_future_spending(), |
|
'recommendations': self.get_spending_recommendations(), |
|
'transaction_count': len(self.transactions_df), |
|
'analysis_date': datetime.now().isoformat() |
|
} |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
analyzer = SpendAnalyzer() |
|
|
|
|
|
sample_transactions = [ |
|
{ |
|
'date': datetime.now() - timedelta(days=5), |
|
'description': 'Amazon Purchase', |
|
'amount': -45.67, |
|
'category': 'Shopping' |
|
}, |
|
{ |
|
'date': datetime.now() - timedelta(days=10), |
|
'description': 'Grocery Store', |
|
'amount': -120.50, |
|
'category': 'Food & Dining' |
|
}, |
|
{ |
|
'date': datetime.now() - timedelta(days=15), |
|
'description': 'Salary Deposit', |
|
'amount': 3000.00, |
|
'category': 'Income' |
|
} |
|
] |
|
|
|
analyzer.load_transactions(sample_transactions) |
|
analyzer.set_budgets({'Shopping': 100, 'Food & Dining': 200}) |
|
|
|
insights = analyzer.analyze_spending_by_category() |
|
print(f"Generated {len(insights)} spending insights") |
|
|
|
recommendations = analyzer.get_spending_recommendations() |
|
print(f"Generated {len(recommendations)} recommendations") |