import cv2 import pytesseract from paddleocr import PaddleOCR from scipy import ndimage import supervision as sv import numpy as np import math from src.categories import CATEGORIES as categories symbol_map = { "⏤": 'Straightness', "⏥": 'Flatness', "⌭": 'Cylindricity', "○": 'Circularity', "⌯": 'Symmetry', "⌖": 'Position', "◎": 'Concentricity', "⟂": 'Perpendicularity', "∥": 'Parallelism', "∠": 'Angularity', "⌓": 'Profile of a surface', "⌒": 'Profile of a line', "⌰": 'Total run-out', "↗": 'Circular run-out' } feature_symbol_map = { 'Ⓕ': '(Free state)', 'Ⓛ': '(LMC)', 'Ⓜ': '(MMC)', 'Ⓟ': '(Projected tolerance zone)', 'Ⓢ': '(RFS)', 'Ⓣ': '(Tangent plane)', 'Ⓤ': '(Unequal bilateral)' } class DatumOCR: def __init__(self): self.ocr = PaddleOCR(use_angle_cls=True, lang='en', show_log=False, use_gpu=False) def crop_img(self, img: np.array, box: any, rotation: int = 0): crop = sv.crop_image(image=img , xyxy=box.xyxy[0].detach().cpu().numpy()) crop = ndimage.rotate(crop, rotation) return crop def crop_by_id(self, img : np.array, id: int, boxes: any, rotation: int = 0): boxes_of_interest = [self.crop_img(img, box, rotation) for box in boxes if box.cls.item() == id] return boxes_of_interest def split_contures(self, img : np.array): # Preprocessing gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] # Find contours cnts, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours = [] # Filter for rectangles and squares for c in cnts: peri = cv2.arcLength(c, True) approx = cv2.approxPolyDP(c, 0.04 * peri, True) area = cv2.contourArea(c) if len(approx) == 4 and area > 200: x, y, w, h = cv2.boundingRect(c) contours.append((x, y, w, h)) #cv2.drawContours(image, [approx], -1, (0, 255, 0), 3) contours.sort(key=lambda rect: rect[0]) return contours def clense_lines(self, img: np.array, linesize : int = 10): """ Input the full label of gd&t as img i.e. _______________ | o | 0.2 | A | '-------------' """ clensed = img.copy() gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] # Remove horizontal lines horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (linesize,1)) remove_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2) cnts = cv2.findContours(remove_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if len(cnts) == 2 else cnts[1] for c in cnts: cv2.drawContours(clensed, [c], -1, (255,255,255), 2) # Remove vertical lines vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,linesize)) remove_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2) cnts = cv2.findContours(remove_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if len(cnts) == 2 else cnts[1] for c in cnts: cv2.drawContours(clensed, [c], -1, (255,255,255), 2) return clensed def read_contures(self, rect, clensed : np.array, math_recognition : bool = True): """ Input: grouped_rectangles: list of rect coordinates with x,y,w,h clensed : preprocessed image to read from """ pix = [] first = math_recognition # as if no math recognition it should always use paddle text = [] #reverse = lines[::-1].copy() for i, rect in enumerate(rect): x, y, w, h = rect roi = clensed[y:y+h, x:x+w] if first: custom_config = r'--oem 3 -l eng_gdt --psm 6' first = False gdt = self.ocr_gdt(roi, custom_config) else: if math_recognition: custom_config = r'--oem 3 -l eng_math --psm 6' gdt = self.ocr_gdt(roi, custom_config) else: gdt = self.ocr_paddle(roi) text.append(gdt) pix.append(roi) return text, pix def ocr_gdt(self, img: np.array, custom_config: str, debug : bool = False): gdt = [] text_ex = pytesseract.image_to_data(img, config=custom_config, output_type='data.frame') text_ex = text_ex[text_ex.conf != -1] if len(text_ex['text']) == 1: item = text_ex['text'].item() gdt.append(str(item)) if item in symbol_map: gdt.append(symbol_map[item]) elif item in feature_symbol_map: gdt.append(feature_symbol_map[item]) if debug: print('gdt - ' + item) else: gdt.append('not readable') return gdt def ocr_paddle(self, roi, debug: bool = False): gdt = [] ocr_res = self.ocr.ocr(roi, cls=False, det=False, rec=True) for idx in range(len(ocr_res)): res = ocr_res[idx] if res is not None: for line in res: gdt.append(str(line[0])) if debug: print('txt - ' + str(line[1][0])) return gdt def read_rois(self, sv_image: np.array, classes_to_detect: list[int], boxes: any, rotation: int): """ Split up the result regions and try to read them -> result is Returned as an 2D array of Strings and an array of images as np.array sv_image = the full image to analize class_to_detect = 4 (GD&T) or 6 (surface) boxes = resulting boxes from YOLO (mostly ~ results[0].boxes) rotation = angle the image needs to be rotated """ res = [] for class_to_detect in classes_to_detect: if class_to_detect == 4: remove_table_structure = True else: remove_table_structure = False boi = self.crop_by_id(sv_image, class_to_detect, boxes, rotation) # clensed = clense_lines(sv_image) #sv.plot_image(image=clensed) for b in boi: if min(b.shape) == 0: continue lines = self.read_roi(b, remove_table_structure, rotation) res.append(f"{categories[class_to_detect]} : {lines}") return res def read_roi(self, b: np.array, remove_table_structure: bool , rotation: int): # turn 90 degree if wrong aligned h, w, _ = b.shape threshold = 1.1 if h > w*threshold: rot = -90 if rotation == 180: rot = rot + 180 b = ndimage.rotate(b, rot) if(remove_table_structure): rect = self.split_contures(b) linesize = math.ceil(max(b.shape)*0.10)-1 clensed = self.clense_lines(b, linesize) else : w, h, _ = b.shape rect = [(0,0, h, w)] clensed = b #preprocessing kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]]) # Apply the sharpening kernel sharpened_image = cv2.filter2D(clensed , -1, kernel) # thresholding _, thresh_img = cv2.threshold(sharpened_image, 128, 255, 0, cv2.THRESH_BINARY) #[print(c) for c in rect] lines, pix = self.read_contures(rect, thresh_img, remove_table_structure) return lines #, pix, thresh_img