Spaces:
Running
Running
File size: 6,435 Bytes
8aca528 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
import numpy as np
from datasets import load_metric
from PIL import ImageDraw, ImageFont
import pandas as pd
metric = load_metric("seqeval")
def unnormalize_box(bbox, width, height):
return [
width * (bbox[0] / 1000),
height * (bbox[1] / 1000),
width * (bbox[2] / 1000),
height * (bbox[3] / 1000)
]
def normalize_box(bbox, width, height):
return [
int((bbox[0] / width) * 1000),
int((bbox[1] / height) * 1000),
int((bbox[2] / width) * 1000),
int((bbox[3] / height) * 1000)
]
def draw_output(image, true_predictions, true_boxes):
def iob_to_label(label):
label = label
if not label:
return 'other'
return label
# width, height = image.size
# predictions = logits.argmax(-1).squeeze().tolist()
# is_subword = np.array(offset_mapping)[:,0] != 0
# true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
# true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]
# draw
draw = ImageDraw.Draw(image)
font = ImageFont.load_default()
for prediction, box in zip(true_predictions, true_boxes):
predicted_label = iob_to_label(prediction).lower()
draw.rectangle(box, outline='red')
draw.text((box[0] + 10, box[1] - 10),
text=predicted_label, fill='red', font=font)
return image
def create_df(true_texts,
true_predictions,
chosen_labels=['SHOP_NAME', 'ADDR', 'TITLE', 'PHONE',
'PRODUCT_NAME', 'AMOUNT', 'UNIT', 'UPRICE', 'SUB_TPRICE', 'UDISCOUNT',
'TAMOUNT', 'TPRICE', 'FPRICE', 'TDISCOUNT',
'RECEMONEY', 'REMAMONEY',
'BILLID', 'DATETIME', 'CASHIER']
):
data = {'text': [], 'class_label': [], 'product_id': []}
product_id = -1
for text, prediction in zip(true_texts, true_predictions):
if prediction not in chosen_labels:
continue
if prediction == 'PRODUCT_NAME':
product_id += 1
if prediction in ['AMOUNT', 'UNIT', 'UDISCOUNT', 'UPRICE', 'SUB_TPRICE',
'UDISCOUNT', 'TAMOUNT', 'TPRICE', 'FPRICE', 'TDISCOUNT',
'RECEMONEY', 'REMAMONEY']:
text = reformat(text)
if prediction in ['AMOUNT', 'SUB_TPRICE', 'UPRICE', 'PRODUCT_NAME']:
data['product_id'].append(product_id)
else:
data['product_id'].append('')
data['class_label'].append(prediction)
data['text'].append(text)
df = pd.DataFrame(data)
return df
def reformat(text: str):
try:
text = text.replace('.', '').replace(',', '').replace(':', '').replace('/', '').replace('|', '').replace(
'\\', '').replace(')', '').replace('(', '').replace('-', '').replace(';', '').replace('_', '')
return int(text)
except:
return text
def find_product(product_name, df):
product_name = product_name.lower()
product_df = df[df['class_label'] == 'PRODUCT_NAME']
mask = product_df['text'].str.lower().str.contains(product_name, case=False, na=False)
if mask.any():
product_id = product_df.loc[mask, 'product_id'].iloc[0]
product_info = df[df['product_id'] == product_id]
prod_name = product_info.loc[product_info['class_label'] == 'PRODUCT_NAME', 'text'].iloc[0]
try:
amount = product_info.loc[product_info['class_label'] == 'AMOUNT', 'text'].iloc[0]
except:
print("Error: cannot find amount")
amount = ''
try:
uprice = product_info.loc[product_info['class_label'] == 'UPRICE', 'text'].iloc[0]
except:
print("Error: cannot find unit price")
uprice = ''
try:
sub_tprice = product_info.loc[product_info['class_label'] == 'SUB_TPRICE', 'text'].iloc[0]
except:
print("Error: cannot find sub total price")
sub_tprice = ''
#print("Sản phẩm: ", product_info.loc[product_info['class_label'] == 'PRODUCT_NAME', 'text'].iloc[0])
#print("Số lượng: ", product_info.loc[product_info['class_label'] == 'AMOUNT', 'text'].iloc[0])
#print("Đơn giá: ", product_info.loc[product_info['class_label'] == 'UPRICE', 'text'].iloc[0])
#print("Thành tiền: ", product_info.loc[product_info['class_label'] == 'SUB_TPRICE', 'text'].iloc[0])
return f"Sản phẩm: {prod_name}\n Số lượng: {amount}\n Đơn giá: {uprice}\n Thành tiền: {sub_tprice}"
else:
#print("Không tìm thấy item nào phù hợp.")
return "Không tìm thấy item nào phù hợp."
#return result = product_df['text'].str.contains(product_name, case=False, na=False).any()
#return product_df[product_df['text'].str.contains(product_name, case=False, na=False)]
def get_info(df):
try:
shop_name = df.loc[df['class_label'] == 'SHOP_NAME', 'text'].iloc[0]
except:
print("Error: cannot find shop name")
shop_name = ''
print("Tên siêu thị: ", shop_name)
try:
addr = df.loc[df['class_label'] == 'ADDR', 'text'].iloc[0]
except:
print("Error: cannot find address")
addr = ''
print("Địa chỉ: ", addr)
try:
bill_id = df.loc[df['class_label'] == 'BILLID', 'text'].iloc[0]
except:
print("Error: cannot find bill id")
bill_id = ''
print("ID hóa đơn: ", bill_id)
try:
date_time = df.loc[df['class_label'] == 'DATETIME', 'text'].iloc[0]
except:
print("Error: cannot find date and time")
date_time = ''
print("Ngày: ", date_time)
try:
cashier = df.loc[df['class_label'] == 'CASHIER', 'text'].iloc[0]
except:
print("Error: cannot find cashier")
cashier = ''
print("Nhân viên: ", cashier)
return f"Tên siêu thị: {shop_name}\n Địa chỉ: {addr}\n ID hóa đơn: {bill_id}\n Ngày: {date_time}\n Nhân viên: {cashier}\n" |