Spaces:

pm6six
/

testing

Sleeping

App Files Files Community

testing / app.py

pm6six

Update app.py

edec0a9 verified about 1 month ago

raw

history blame contribute delete

3.06 kB

	import streamlit as st
	import pdfplumber
	import pandas as pd

	# Define category mapping based on transaction keywords
	CATEGORY_MAPPING = {
	"Groceries": ["Walmart", "Kroger", "Whole Foods", "Costco", "Trader Joe", "Safeway"],
	"Dining": ["McDonald's", "Starbucks", "Chipotle", "Subway", "Domino", "Pizza", "Burger", "Restaurant"],
	"Utilities": ["Verizon", "AT&T", "T-Mobile", "Sprint", "Comcast", "Xfinity", "Con Edison", "Electric", "Water", "Gas"],
	"Rent": ["Apartment", "Rent", "Landlord", "Lease"],
	"Entertainment": ["Netflix", "Spotify", "Amazon Prime", "Hulu", "Disney", "Cinema"],
	"Transport": ["Uber", "Lyft", "MetroCard", "Gas Station", "Shell", "Chevron"],
	"Healthcare": ["Pharmacy", "CVS", "Walgreens", "Doctor", "Hospital", "Dental"],
	"Shopping": ["Amazon", "Best Buy", "Target", "Walmart", "Ebay", "Retail"],
	"Other": []
	}

	# Function to classify transactions based on description
	def classify_transaction(description):
	description = str(description).lower()
	for category, keywords in CATEGORY_MAPPING.items():
	if any(keyword.lower() in description for keyword in keywords):
	return category
	return "Other"

	# Function to process uploaded PDF and categorize transactions
	def process_pdf(file):
	if file is None:
	st.error("No file uploaded.")
	return None

	# Extract text from PDF
	with pdfplumber.open(file) as pdf:
	text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])

	# Extract transactions (Modify based on statement format)
	lines = text.split("\n")
	transactions = [line for line in lines if any(char.isdigit() for char in line)]

	# Convert to DataFrame
	df = pd.DataFrame([line.split()[:3] for line in transactions], columns=["Date", "Description", "Amount"])

	# Ensure amount column is numeric
	df["Amount"] = pd.to_numeric(df["Amount"], errors="coerce")

	# Ensure no missing descriptions
	df["Description"] = df["Description"].fillna("Unknown")

	# Apply classification
	df["Category"] = df["Description"].apply(classify_transaction)

	# Summarize total spending per category
	category_summary = df.groupby("Category")["Amount"].sum().reset_index()

	return df, category_summary # Return full transactions and summary

	# Streamlit UI
	st.title("📄 Credit Card Statement Classifier")
	st.write("Upload a PDF bank/credit card statement, and this app will categorize transactions and show your spending summary.")

	uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])

	if uploaded_file is not None:
	st.success("✅ File uploaded successfully!")

	# Process and display transactions
	df_result, category_summary = process_pdf(uploaded_file)

	if df_result is not None:
	st.write("### 📊 Classified Transactions:")
	st.dataframe(df_result) # Display detailed transactions

	st.write("### 💰 Spending Summary by Category:")
	st.dataframe(category_summary) # Display spending summary