Reciept_Analyzer / utils.py
Huy0502's picture
Upload 3 files
8aca528 verified
import numpy as np
from datasets import load_metric
from PIL import ImageDraw, ImageFont
import pandas as pd
metric = load_metric("seqeval")
def unnormalize_box(bbox, width, height):
return [
width * (bbox[0] / 1000),
height * (bbox[1] / 1000),
width * (bbox[2] / 1000),
height * (bbox[3] / 1000)
]
def normalize_box(bbox, width, height):
return [
int((bbox[0] / width) * 1000),
int((bbox[1] / height) * 1000),
int((bbox[2] / width) * 1000),
int((bbox[3] / height) * 1000)
]
def draw_output(image, true_predictions, true_boxes):
def iob_to_label(label):
label = label
if not label:
return 'other'
return label
# width, height = image.size
# predictions = logits.argmax(-1).squeeze().tolist()
# is_subword = np.array(offset_mapping)[:,0] != 0
# true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
# true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]
# draw
draw = ImageDraw.Draw(image)
font = ImageFont.load_default()
for prediction, box in zip(true_predictions, true_boxes):
predicted_label = iob_to_label(prediction).lower()
draw.rectangle(box, outline='red')
draw.text((box[0] + 10, box[1] - 10),
text=predicted_label, fill='red', font=font)
return image
def create_df(true_texts,
true_predictions,
chosen_labels=['SHOP_NAME', 'ADDR', 'TITLE', 'PHONE',
'PRODUCT_NAME', 'AMOUNT', 'UNIT', 'UPRICE', 'SUB_TPRICE', 'UDISCOUNT',
'TAMOUNT', 'TPRICE', 'FPRICE', 'TDISCOUNT',
'RECEMONEY', 'REMAMONEY',
'BILLID', 'DATETIME', 'CASHIER']
):
data = {'text': [], 'class_label': [], 'product_id': []}
product_id = -1
for text, prediction in zip(true_texts, true_predictions):
if prediction not in chosen_labels:
continue
if prediction == 'PRODUCT_NAME':
product_id += 1
if prediction in ['AMOUNT', 'UNIT', 'UDISCOUNT', 'UPRICE', 'SUB_TPRICE',
'UDISCOUNT', 'TAMOUNT', 'TPRICE', 'FPRICE', 'TDISCOUNT',
'RECEMONEY', 'REMAMONEY']:
text = reformat(text)
if prediction in ['AMOUNT', 'SUB_TPRICE', 'UPRICE', 'PRODUCT_NAME']:
data['product_id'].append(product_id)
else:
data['product_id'].append('')
data['class_label'].append(prediction)
data['text'].append(text)
df = pd.DataFrame(data)
return df
def reformat(text: str):
try:
text = text.replace('.', '').replace(',', '').replace(':', '').replace('/', '').replace('|', '').replace(
'\\', '').replace(')', '').replace('(', '').replace('-', '').replace(';', '').replace('_', '')
return int(text)
except:
return text
def find_product(product_name, df):
product_name = product_name.lower()
product_df = df[df['class_label'] == 'PRODUCT_NAME']
mask = product_df['text'].str.lower().str.contains(product_name, case=False, na=False)
if mask.any():
product_id = product_df.loc[mask, 'product_id'].iloc[0]
product_info = df[df['product_id'] == product_id]
prod_name = product_info.loc[product_info['class_label'] == 'PRODUCT_NAME', 'text'].iloc[0]
try:
amount = product_info.loc[product_info['class_label'] == 'AMOUNT', 'text'].iloc[0]
except:
print("Error: cannot find amount")
amount = ''
try:
uprice = product_info.loc[product_info['class_label'] == 'UPRICE', 'text'].iloc[0]
except:
print("Error: cannot find unit price")
uprice = ''
try:
sub_tprice = product_info.loc[product_info['class_label'] == 'SUB_TPRICE', 'text'].iloc[0]
except:
print("Error: cannot find sub total price")
sub_tprice = ''
#print("Sản phẩm: ", product_info.loc[product_info['class_label'] == 'PRODUCT_NAME', 'text'].iloc[0])
#print("Số lượng: ", product_info.loc[product_info['class_label'] == 'AMOUNT', 'text'].iloc[0])
#print("Đơn giá: ", product_info.loc[product_info['class_label'] == 'UPRICE', 'text'].iloc[0])
#print("Thành tiền: ", product_info.loc[product_info['class_label'] == 'SUB_TPRICE', 'text'].iloc[0])
return f"Sản phẩm: {prod_name}\n Số lượng: {amount}\n Đơn giá: {uprice}\n Thành tiền: {sub_tprice}"
else:
#print("Không tìm thấy item nào phù hợp.")
return "Không tìm thấy item nào phù hợp."
#return result = product_df['text'].str.contains(product_name, case=False, na=False).any()
#return product_df[product_df['text'].str.contains(product_name, case=False, na=False)]
def get_info(df):
try:
shop_name = df.loc[df['class_label'] == 'SHOP_NAME', 'text'].iloc[0]
except:
print("Error: cannot find shop name")
shop_name = ''
print("Tên siêu thị: ", shop_name)
try:
addr = df.loc[df['class_label'] == 'ADDR', 'text'].iloc[0]
except:
print("Error: cannot find address")
addr = ''
print("Địa chỉ: ", addr)
try:
bill_id = df.loc[df['class_label'] == 'BILLID', 'text'].iloc[0]
except:
print("Error: cannot find bill id")
bill_id = ''
print("ID hóa đơn: ", bill_id)
try:
date_time = df.loc[df['class_label'] == 'DATETIME', 'text'].iloc[0]
except:
print("Error: cannot find date and time")
date_time = ''
print("Ngày: ", date_time)
try:
cashier = df.loc[df['class_label'] == 'CASHIER', 'text'].iloc[0]
except:
print("Error: cannot find cashier")
cashier = ''
print("Nhân viên: ", cashier)
return f"Tên siêu thị: {shop_name}\n Địa chỉ: {addr}\n ID hóa đơn: {bill_id}\n Ngày: {date_time}\n Nhân viên: {cashier}\n"