File size: 4,475 Bytes
fa0f216 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import numpy as np
import cv2
def detect_text_bounds(image: np.array) -> (int, int):
"""
Find the lower and upper bounding lines in an image of a word
"""
if len(image.shape) >= 3 and image.shape[2] == 3:
image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
elif len(image.shape) >= 3 and image.shape[2] == 1:
image = np.squeeze(image, axis=-1)
_, threshold = cv2.threshold(image, 0.8, 1, cv2.THRESH_BINARY_INV)
line_sums = np.sum(threshold, axis=1).astype(float)
line_sums = np.convolve(line_sums, np.ones(5) / 5, mode='same')
line_sums_d = np.diff(line_sums)
std_factor = 0.5
min_threshold = np.mean(line_sums_d[line_sums_d <= 0]) - std_factor * np.std(line_sums_d[line_sums_d <= 0])
bottom_index = np.max(np.where(line_sums_d < min_threshold))
max_threshold = np.mean(line_sums_d[line_sums_d >= 0]) + std_factor * np.std(line_sums_d[line_sums_d >= 0])
top_index = np.min(np.where(line_sums_d > max_threshold))
return bottom_index, top_index
def dist(p_one, p_two) -> float:
return np.linalg.norm(p_two - p_one)
def crop(image: np.array, ratio: float = None, pixels: int = None) -> np.array:
assert ratio is not None or pixels is not None, "Please specify either pixels or a ratio to crop"
width, height = image.shape[:2]
if ratio is not None:
width_crop = int(ratio * width)
height_crop = int(ratio * height)
else:
width_crop= pixels
height_crop = pixels
return image[height_crop:height-height_crop, width_crop:width-width_crop]
def find_target_points(top_left, top_right, bottom_left, bottom_right):
max_width = max(int(dist(bottom_right, bottom_left)), int(dist(top_right, top_left)))
max_height = max(int(dist(top_right, bottom_right)), int(dist(top_left, bottom_left)))
destination_corners = [[0, 0], [max_width, 0], [max_width, max_height], [0, max_height]]
return order_points(destination_corners)
def order_points(points: np.array) -> tuple:
"""
inspired by: https://learnopencv.com/automatic-document-scanner-using-opencv/
"""
sum = np.sum(points, axis=1)
top_left = points[np.argmin(sum)]
bottom_right = points[np.argmax(sum)]
diff = np.diff(points, axis=1)
top_right = points[np.argmin(diff)]
bottom_left = points[np.argmax(diff)]
return top_left, top_right, bottom_left, bottom_right
def get_page(image: np.array) -> np.array:
"""
inspired by: https://github.com/Kakaranish/OpenCV-paper-detection
"""
filtered = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
filtered = cv2.medianBlur(filtered, 11)
canny = cv2.Canny(filtered, 30, 50, 3)
contours, _ = cv2.findContours(canny, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
max_perimeter = 0
max_contour = None
for contour in contours:
contour = np.array(contour)
perimeter = cv2.arcLength(contour, True)
contour_approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True)
if perimeter > max_perimeter and cv2.isContourConvex(contour_approx) and len(contour_approx) == 4:
max_perimeter = perimeter
max_contour = contour_approx
if max_contour is not None:
max_contour = np.squeeze(max_contour)
points = order_points(max_contour)
target_points = find_target_points(*points)
M = cv2.getPerspectiveTransform(np.float32(points), np.float32(target_points))
final = cv2.warpPerspective(image, M, (target_points[3][0], target_points[3][1]), flags=cv2.INTER_LINEAR)
final = crop(final, pixels=10)
return final
return image
def get_words(page: np.array, dilation_size: int = 3):
gray = cv2.cvtColor(page, cv2.COLOR_BGR2GRAY)
_, thresholded = cv2.threshold(gray, 125, 1, cv2.THRESH_BINARY_INV)
dilation_size = dilation_size
element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * dilation_size + 1, 2 * dilation_size + 1),
(dilation_size, dilation_size))
thresholded = cv2.dilate(thresholded, element, iterations=3)
contours, _ = cv2.findContours(thresholded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
words = []
boxes = []
for contour in contours:
x, y, w, h = cv2.boundingRect(contour)
ratio = w / h
if ratio <= 0.1 or ratio >= 10.0:
continue
boxes.append([x, y, w, h])
words.append(page[y:y+h, x:x+w])
return words, boxes |