testing / app.py
pm6six's picture
Update app.py
edec0a9 verified
import streamlit as st
import pdfplumber
import pandas as pd
# Define category mapping based on transaction keywords
CATEGORY_MAPPING = {
"Groceries": ["Walmart", "Kroger", "Whole Foods", "Costco", "Trader Joe", "Safeway"],
"Dining": ["McDonald's", "Starbucks", "Chipotle", "Subway", "Domino", "Pizza", "Burger", "Restaurant"],
"Utilities": ["Verizon", "AT&T", "T-Mobile", "Sprint", "Comcast", "Xfinity", "Con Edison", "Electric", "Water", "Gas"],
"Rent": ["Apartment", "Rent", "Landlord", "Lease"],
"Entertainment": ["Netflix", "Spotify", "Amazon Prime", "Hulu", "Disney", "Cinema"],
"Transport": ["Uber", "Lyft", "MetroCard", "Gas Station", "Shell", "Chevron"],
"Healthcare": ["Pharmacy", "CVS", "Walgreens", "Doctor", "Hospital", "Dental"],
"Shopping": ["Amazon", "Best Buy", "Target", "Walmart", "Ebay", "Retail"],
"Other": []
}
# Function to classify transactions based on description
def classify_transaction(description):
description = str(description).lower()
for category, keywords in CATEGORY_MAPPING.items():
if any(keyword.lower() in description for keyword in keywords):
return category
return "Other"
# Function to process uploaded PDF and categorize transactions
def process_pdf(file):
if file is None:
st.error("No file uploaded.")
return None
# Extract text from PDF
with pdfplumber.open(file) as pdf:
text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
# Extract transactions (Modify based on statement format)
lines = text.split("\n")
transactions = [line for line in lines if any(char.isdigit() for char in line)]
# Convert to DataFrame
df = pd.DataFrame([line.split()[:3] for line in transactions], columns=["Date", "Description", "Amount"])
# Ensure amount column is numeric
df["Amount"] = pd.to_numeric(df["Amount"], errors="coerce")
# Ensure no missing descriptions
df["Description"] = df["Description"].fillna("Unknown")
# Apply classification
df["Category"] = df["Description"].apply(classify_transaction)
# Summarize total spending per category
category_summary = df.groupby("Category")["Amount"].sum().reset_index()
return df, category_summary # Return full transactions and summary
# Streamlit UI
st.title("πŸ“„ Credit Card Statement Classifier")
st.write("Upload a **PDF bank/credit card statement**, and this app will categorize transactions and show your spending summary.")
uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
if uploaded_file is not None:
st.success("βœ… File uploaded successfully!")
# Process and display transactions
df_result, category_summary = process_pdf(uploaded_file)
if df_result is not None:
st.write("### πŸ“Š Classified Transactions:")
st.dataframe(df_result) # Display detailed transactions
st.write("### πŸ’° Spending Summary by Category:")
st.dataframe(category_summary) # Display spending summary