CadExtractor / src /utils /rotation.py
Martin Krockert
Demo with tesseract / paddle and finetuned yolo 12
fa54254
raw
history blame
2.36 kB
from PIL import Image
def predict(yolo_result : any
,img : Image = None) -> int:
"""
pedicts the orientation based on the position of the largest table anotation and returns a degree the image should be rotated.
input:
yolo_result : for one image
img : PIL Image, default None
if None the image from result is taken
"""
result = yolo_result
boxes = result.boxes # Boxes object for bounding box outputs
# masks = result.masks # Masks object for segmentation masks outputs
# keypoints = result.keypoints # Keypoints object for pose outputs
# probs = result.probs # Probs object for classification outputs
if img is None:
img = result.plot() # BGR-order numpy array
img = Image.fromarray(img[..., ::-1]) # RGB-order PIL image
box_with_max_volume = get_reference_table(boxes)
if box_with_max_volume == None:
return 0 #early return if nothing found.
# Get the coordinates of the box with the largest volume
x1, y1, x2, y2 = box_with_max_volume.xyxy[0].tolist()
# Get the distances to the borders
dist_top = y1
dist_left = x1
dist_right = img.width - x2
dist_bottom = img.height - y2
# Determine the rotation angle based on the distances to the borders
if dist_top < dist_bottom and dist_left < dist_right: # top left corner
rotation_angle = 180 # Rotate by 180 degrees
elif dist_top < dist_bottom and dist_left >= dist_right: # top right corner
rotation_angle = -90 # Rotate by -90 degrees
elif dist_top > dist_bottom and dist_left < dist_right: # bottom left
rotation_angle = 90
else:
rotation_angle = 0 # do nothing
return rotation_angle # , box_with_max_volume
# Rotate the original image by the calculated angle
# rotated_image = img.rotate(rotation_angle, expand=True)
def get_reference_table(boxes):
"""
Returns the reference table from result
"""
# get all tables by class id
tables = [box for box in boxes if box.cls.item() == 1]
if len(tables) == 0:
return None
# detect box with the largest volume
box_with_max_volume = max(
tables,
key=lambda box: (box.xywhn[0][2].item() * box.xywhn[0][3].item())
)
# retun the biggest box
return box_with_max_volume