Spaces:

satyam007
/

invoice_extraction

Runtime error

App Files Files Community

invoice_extraction / app.py

satyam007

Update app.py

3cf7dad verified 29 days ago

raw

history blame contribute delete

3.68 kB

	import gradio as gr
	from transformers import AutoModelForConditionalGeneration, AutoProcessor
	import torch
	import pandas as pd
	import pytesseract
	import cv2

	# Set Tesseract command (only works if Tesseract is already installed on the hosting server)
	pytesseract.pytesseract_cmd = r'/usr/bin/tesseract'

	# Initialize the model and processor from Hugging Face Hub
	model_name = "Qwen/Qwen2-VL-2B-Instruct-AWQ"

	model = AutoModelForConditionalGeneration.from_pretrained(
	model_name,
	torch_dtype="auto"
	)
	model.to("cpu")

	processor = AutoProcessor.from_pretrained(model_name)

	# Preprocessing image for OCR
	def preprocess_image(image_path):
	image = cv2.imread(image_path)
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	_, binary = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
	return binary

	# OCR-based text extraction
	def ocr_extract_text(image_path):
	preprocessed_image = preprocess_image(image_path)
	return pytesseract.image_to_string(preprocessed_image)

	# Model-based image processing
	def process_image(image_path):
	try:
	messages = [{
	"role": "user",
	"content": [
	{"type": "image", "image": image_path},
	{"type": "text", "text": (
	"Extract the following details from the invoice:\n"
	"- 'invoice_number'\n"
	"- 'date'\n"
	"- 'place'\n"
	"- 'amount' (monetary value in the relevant currency)\n"
	"- 'category' (based on the invoice type)"
	)}
	]
	}]

	text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

	# Removed process_vision_info and used the processor directly
	inputs = processor(text=[text], padding=True, return_tensors="pt")
	inputs = inputs.to(model.device)

	generated_ids = model.generate(**inputs, max_new_tokens=128)
	output_text = processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)

	return parse_details(output_text[0])

	except Exception as e:
	print(f"Model failed, falling back to OCR: {e}")
	ocr_text = ocr_extract_text(image_path)
	return parse_details(ocr_text)

	# Parsing details from text
	def parse_details(details):
	parsed_data = {
	"Invoice Number": None,
	"Date": None,
	"Place": None,
	"Amount": None,
	"Category": None
	}

	lines = details.split("\n")
	for line in lines:
	lower_line = line.lower()
	if "invoice" in lower_line:
	parsed_data["Invoice Number"] = line.split(":")[-1].strip()
	elif "date" in lower_line:
	parsed_data["Date"] = line.split(":")[-1].strip()
	elif "place" in lower_line:
	parsed_data["Place"] = line.split(":")[-1].strip()
	elif any(keyword in lower_line for keyword in ["total", "amount", "cost"]):
	parsed_data["Amount"] = line.split(":")[-1].strip()
	else:
	parsed_data["Category"] = "General"

	return parsed_data

	# Gradio Interface
	def gradio_interface(image_files):
	results = []
	for image_file in image_files:
	details = process_image(image_file.name)
	results.append(details)

	df = pd.DataFrame(results)
	return df

	# Launch Gradio App
	grpc_interface = gr.Interface(
	fn=gradio_interface,
	inputs=gr.File(label="Upload Invoice Images", file_types=["image"]),
	outputs=gr.Dataframe(interactive=True),
	title="Invoice Extraction System"
	)

	if __name__ == "__main__":
	grpc_interface.launch(share=True)