Spaces:
Sleeping
Sleeping
import cv2 | |
import pytesseract | |
from paddleocr import PaddleOCR | |
from scipy import ndimage | |
import supervision as sv | |
import numpy as np | |
import math | |
from src.categories import CATEGORIES as categories | |
symbol_map = { | |
"β€": 'Straightness', | |
"β₯": 'Flatness', | |
"β": 'Cylindricity', | |
"β": 'Circularity', | |
"β―": 'Symmetry', | |
"β": 'Position', | |
"β": 'Concentricity', | |
"β": 'Perpendicularity', | |
"β₯": 'Parallelism', | |
"β ": 'Angularity', | |
"β": 'Profile of a surface', | |
"β": 'Profile of a line', | |
"β°": 'Total run-out', | |
"β": 'Circular run-out' | |
} | |
feature_symbol_map = { | |
'β»': '(Free state)', | |
'β': '(LMC)', | |
'β': '(MMC)', | |
'β ': '(Projected tolerance zone)', | |
'β': '(RFS)', | |
'β': '(Tangent plane)', | |
'β': '(Unequal bilateral)' | |
} | |
class DatumOCR: | |
def __init__(self): | |
self.ocr = PaddleOCR(use_angle_cls=True, lang='en', show_log=False, use_gpu=False) | |
def crop_img(self, img: np.array, box: any, rotation: int = 0): | |
crop = sv.crop_image(image=img , xyxy=box.xyxy[0].detach().cpu().numpy()) | |
crop = ndimage.rotate(crop, rotation) | |
return crop | |
def crop_by_id(self, img : np.array, id: int, boxes: any, rotation: int = 0): | |
boxes_of_interest = [self.crop_img(img, box, rotation) for box in boxes if box.cls.item() == id] | |
return boxes_of_interest | |
def split_contures(self, img : np.array): | |
# Preprocessing | |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] | |
# Find contours | |
cnts, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) | |
contours = [] | |
# Filter for rectangles and squares | |
for c in cnts: | |
peri = cv2.arcLength(c, True) | |
approx = cv2.approxPolyDP(c, 0.04 * peri, True) | |
area = cv2.contourArea(c) | |
if len(approx) == 4 and area > 200: | |
x, y, w, h = cv2.boundingRect(c) | |
contours.append((x, y, w, h)) | |
#cv2.drawContours(image, [approx], -1, (0, 255, 0), 3) | |
contours.sort(key=lambda rect: rect[0]) | |
return contours | |
def clense_lines(self, img: np.array, linesize : int = 10): | |
""" Input the full label of gd&t as img | |
i.e. | |
_______________ | |
| o | 0.2 | A | | |
'-------------' | |
""" | |
clensed = img.copy() | |
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) | |
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] | |
# Remove horizontal lines | |
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (linesize,1)) | |
remove_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2) | |
cnts = cv2.findContours(remove_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
cnts = cnts[0] if len(cnts) == 2 else cnts[1] | |
for c in cnts: | |
cv2.drawContours(clensed, [c], -1, (255,255,255), 2) | |
# Remove vertical lines | |
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,linesize)) | |
remove_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2) | |
cnts = cv2.findContours(remove_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
cnts = cnts[0] if len(cnts) == 2 else cnts[1] | |
for c in cnts: | |
cv2.drawContours(clensed, [c], -1, (255,255,255), 2) | |
return clensed | |
def read_contures(self, rect, clensed : np.array, math_recognition : bool = True): | |
""" | |
Input: | |
grouped_rectangles: list of rect coordinates with x,y,w,h | |
clensed : preprocessed image to read from | |
""" | |
pix = [] | |
first = math_recognition # as if no math recognition it should always use paddle | |
text = [] | |
#reverse = lines[::-1].copy() | |
for i, rect in enumerate(rect): | |
x, y, w, h = rect | |
roi = clensed[y:y+h, x:x+w] | |
if first: | |
custom_config = r'--oem 3 -l eng_gdt --psm 6' | |
first = False | |
gdt = self.ocr_gdt(roi, custom_config) | |
else: | |
if math_recognition: | |
custom_config = r'--oem 3 -l eng_math --psm 6' | |
gdt = self.ocr_gdt(roi, custom_config) | |
else: | |
gdt = self.ocr_paddle(roi) | |
text.append(gdt) | |
pix.append(roi) | |
return text, pix | |
def ocr_gdt(self, img: np.array, custom_config: str, debug : bool = False): | |
gdt = [] | |
text_ex = pytesseract.image_to_data(img, config=custom_config, output_type='data.frame') | |
text_ex = text_ex[text_ex.conf != -1] | |
if len(text_ex['text']) == 1: | |
item = text_ex['text'].item() | |
gdt.append(str(item)) | |
if item in symbol_map: | |
gdt.append(symbol_map[item]) | |
elif item in feature_symbol_map: | |
gdt.append(feature_symbol_map[item]) | |
if debug: | |
print('gdt - ' + item) | |
else: | |
gdt.append('not readable') | |
return gdt | |
def ocr_paddle(self, roi, debug: bool = False): | |
gdt = [] | |
ocr_res = self.ocr.ocr(roi, cls=False, det=False, rec=True) | |
for idx in range(len(ocr_res)): | |
res = ocr_res[idx] | |
if res is not None: | |
for line in res: | |
gdt.append(str(line[0])) | |
if debug: | |
print('txt - ' + str(line[1][0])) | |
return gdt | |
def read_rois(self, sv_image: np.array, classes_to_detect: list[int], boxes: any, rotation: int): | |
""" | |
Split up the result regions and try to read them -> result is Returned as an 2D array of Strings and an array of images as np.array | |
sv_image = the full image to analize | |
class_to_detect = 4 (GD&T) or 6 (surface) | |
boxes = resulting boxes from YOLO (mostly ~ results[0].boxes) | |
rotation = angle the image needs to be rotated | |
""" | |
res = [] | |
for class_to_detect in classes_to_detect: | |
if class_to_detect == 4: | |
remove_table_structure = True | |
else: | |
remove_table_structure = False | |
boi = self.crop_by_id(sv_image, class_to_detect, boxes, rotation) | |
# clensed = clense_lines(sv_image) | |
#sv.plot_image(image=clensed) | |
for b in boi: | |
if min(b.shape) == 0: | |
continue | |
lines = self.read_roi(b, remove_table_structure, rotation) | |
res.append(f"{categories[class_to_detect]} : {lines}") | |
return res | |
def read_roi(self, b: np.array, remove_table_structure: bool , rotation: int): | |
# turn 90 degree if wrong aligned | |
h, w, _ = b.shape | |
threshold = 1.1 | |
if h > w*threshold: | |
rot = -90 | |
if rotation == 180: | |
rot = rot + 180 | |
b = ndimage.rotate(b, rot) | |
if(remove_table_structure): | |
rect = self.split_contures(b) | |
linesize = math.ceil(max(b.shape)*0.10)-1 | |
clensed = self.clense_lines(b, linesize) | |
else : | |
w, h, _ = b.shape | |
rect = [(0,0, h, w)] | |
clensed = b | |
#preprocessing | |
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]]) | |
# Apply the sharpening kernel | |
sharpened_image = cv2.filter2D(clensed , -1, kernel) | |
# thresholding | |
_, thresh_img = cv2.threshold(sharpened_image, 128, 255, 0, cv2.THRESH_BINARY) | |
#[print(c) for c in rect] | |
lines, pix = self.read_contures(rect, thresh_img, remove_table_structure) | |
return lines #, pix, thresh_img |