Spaces:

Huy0502
/

Reciept_Analyzer

Running

App Files Files Community

Reciept_Analyzer / utils.py

Huy0502

Upload 3 files

8aca528 verified 26 days ago

raw

history blame contribute delete

6.44 kB

	import numpy as np
	from datasets import load_metric
	from PIL import ImageDraw, ImageFont
	import pandas as pd


	metric = load_metric("seqeval")


	def unnormalize_box(bbox, width, height):
	return [
	width * (bbox[0] / 1000),
	height * (bbox[1] / 1000),
	width * (bbox[2] / 1000),
	height * (bbox[3] / 1000)
	]


	def normalize_box(bbox, width, height):
	return [
	int((bbox[0] / width) * 1000),
	int((bbox[1] / height) * 1000),
	int((bbox[2] / width) * 1000),
	int((bbox[3] / height) * 1000)
	]


	def draw_output(image, true_predictions, true_boxes):
	def iob_to_label(label):
	label = label
	if not label:
	return 'other'
	return label

	# width, height = image.size

	# predictions = logits.argmax(-1).squeeze().tolist()
	# is_subword = np.array(offset_mapping)[:,0] != 0
	# true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
	# true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]

	# draw
	draw = ImageDraw.Draw(image)
	font = ImageFont.load_default()

	for prediction, box in zip(true_predictions, true_boxes):
	predicted_label = iob_to_label(prediction).lower()
	draw.rectangle(box, outline='red')
	draw.text((box[0] + 10, box[1] - 10),
	text=predicted_label, fill='red', font=font)

	return image


	def create_df(true_texts,
	true_predictions,
	chosen_labels=['SHOP_NAME', 'ADDR', 'TITLE', 'PHONE',
	'PRODUCT_NAME', 'AMOUNT', 'UNIT', 'UPRICE', 'SUB_TPRICE', 'UDISCOUNT',
	'TAMOUNT', 'TPRICE', 'FPRICE', 'TDISCOUNT',
	'RECEMONEY', 'REMAMONEY',
	'BILLID', 'DATETIME', 'CASHIER']
	):

	data = {'text': [], 'class_label': [], 'product_id': []}
	product_id = -1
	for text, prediction in zip(true_texts, true_predictions):
	if prediction not in chosen_labels:
	continue

	if prediction == 'PRODUCT_NAME':
	product_id += 1


	if prediction in ['AMOUNT', 'UNIT', 'UDISCOUNT', 'UPRICE', 'SUB_TPRICE',
	'UDISCOUNT', 'TAMOUNT', 'TPRICE', 'FPRICE', 'TDISCOUNT',
	'RECEMONEY', 'REMAMONEY']:
	text = reformat(text)


	if prediction in ['AMOUNT', 'SUB_TPRICE', 'UPRICE', 'PRODUCT_NAME']:
	data['product_id'].append(product_id)
	else:
	data['product_id'].append('')


	data['class_label'].append(prediction)
	data['text'].append(text)


	df = pd.DataFrame(data)

	return df


	def reformat(text: str):
	try:
	text = text.replace('.', '').replace(',', '').replace(':', '').replace('/', '').replace('\|', '').replace(
	'\\', '').replace(')', '').replace('(', '').replace('-', '').replace(';', '').replace('_', '')
	return int(text)
	except:
	return text

	def find_product(product_name, df):
	product_name = product_name.lower()
	product_df = df[df['class_label'] == 'PRODUCT_NAME']
	mask = product_df['text'].str.lower().str.contains(product_name, case=False, na=False)
	if mask.any():
	product_id = product_df.loc[mask, 'product_id'].iloc[0]
	product_info = df[df['product_id'] == product_id]

	prod_name = product_info.loc[product_info['class_label'] == 'PRODUCT_NAME', 'text'].iloc[0]

	try:
	amount = product_info.loc[product_info['class_label'] == 'AMOUNT', 'text'].iloc[0]
	except:
	print("Error: cannot find amount")
	amount = ''

	try:
	uprice = product_info.loc[product_info['class_label'] == 'UPRICE', 'text'].iloc[0]
	except:
	print("Error: cannot find unit price")
	uprice = ''

	try:
	sub_tprice = product_info.loc[product_info['class_label'] == 'SUB_TPRICE', 'text'].iloc[0]
	except:
	print("Error: cannot find sub total price")
	sub_tprice = ''

	#print("Sản phẩm: ", product_info.loc[product_info['class_label'] == 'PRODUCT_NAME', 'text'].iloc[0])
	#print("Số lượng: ", product_info.loc[product_info['class_label'] == 'AMOUNT', 'text'].iloc[0])
	#print("Đơn giá: ", product_info.loc[product_info['class_label'] == 'UPRICE', 'text'].iloc[0])
	#print("Thành tiền: ", product_info.loc[product_info['class_label'] == 'SUB_TPRICE', 'text'].iloc[0])
	return f"Sản phẩm: {prod_name}\n Số lượng: {amount}\n Đơn giá: {uprice}\n Thành tiền: {sub_tprice}"
	else:
	#print("Không tìm thấy item nào phù hợp.")
	return "Không tìm thấy item nào phù hợp."
	#return result = product_df['text'].str.contains(product_name, case=False, na=False).any()
	#return product_df[product_df['text'].str.contains(product_name, case=False, na=False)]


	def get_info(df):
	try:
	shop_name = df.loc[df['class_label'] == 'SHOP_NAME', 'text'].iloc[0]
	except:
	print("Error: cannot find shop name")
	shop_name = ''
	print("Tên siêu thị: ", shop_name)

	try:
	addr = df.loc[df['class_label'] == 'ADDR', 'text'].iloc[0]
	except:
	print("Error: cannot find address")
	addr = ''
	print("Địa chỉ: ", addr)

	try:
	bill_id = df.loc[df['class_label'] == 'BILLID', 'text'].iloc[0]
	except:
	print("Error: cannot find bill id")
	bill_id = ''
	print("ID hóa đơn: ", bill_id)

	try:
	date_time = df.loc[df['class_label'] == 'DATETIME', 'text'].iloc[0]
	except:
	print("Error: cannot find date and time")
	date_time = ''
	print("Ngày: ", date_time)

	try:
	cashier = df.loc[df['class_label'] == 'CASHIER', 'text'].iloc[0]
	except:
	print("Error: cannot find cashier")
	cashier = ''
	print("Nhân viên: ", cashier)

	return f"Tên siêu thị: {shop_name}\n Địa chỉ: {addr}\n ID hóa đơn: {bill_id}\n Ngày: {date_time}\n Nhân viên: {cashier}\n"