vittoriopippi

Change imports

9c772c4 13 days ago

4.48 kB

	import numpy as np
	import cv2


	def detect_text_bounds(image: np.array) -> (int, int):
	"""
	Find the lower and upper bounding lines in an image of a word
	"""
	if len(image.shape) >= 3 and image.shape[2] == 3:
	image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
	elif len(image.shape) >= 3 and image.shape[2] == 1:
	image = np.squeeze(image, axis=-1)

	_, threshold = cv2.threshold(image, 0.8, 1, cv2.THRESH_BINARY_INV)

	line_sums = np.sum(threshold, axis=1).astype(float)
	line_sums = np.convolve(line_sums, np.ones(5) / 5, mode='same')

	line_sums_d = np.diff(line_sums)

	std_factor = 0.5
	min_threshold = np.mean(line_sums_d[line_sums_d <= 0]) - std_factor * np.std(line_sums_d[line_sums_d <= 0])
	bottom_index = np.max(np.where(line_sums_d < min_threshold))

	max_threshold = np.mean(line_sums_d[line_sums_d >= 0]) + std_factor * np.std(line_sums_d[line_sums_d >= 0])
	top_index = np.min(np.where(line_sums_d > max_threshold))

	return bottom_index, top_index


	def dist(p_one, p_two) -> float:
	return np.linalg.norm(p_two - p_one)


	def crop(image: np.array, ratio: float = None, pixels: int = None) -> np.array:
	assert ratio is not None or pixels is not None, "Please specify either pixels or a ratio to crop"

	width, height = image.shape[:2]

	if ratio is not None:

	width_crop = int(ratio * width)
	height_crop = int(ratio * height)
	else:
	width_crop= pixels
	height_crop = pixels

	return image[height_crop:height-height_crop, width_crop:width-width_crop]


	def find_target_points(top_left, top_right, bottom_left, bottom_right):
	max_width = max(int(dist(bottom_right, bottom_left)), int(dist(top_right, top_left)))
	max_height = max(int(dist(top_right, bottom_right)), int(dist(top_left, bottom_left)))
	destination_corners = [[0, 0], [max_width, 0], [max_width, max_height], [0, max_height]]

	return order_points(destination_corners)


	def order_points(points: np.array) -> tuple:
	"""
	inspired by: https://learnopencv.com/automatic-document-scanner-using-opencv/
	"""
	sum = np.sum(points, axis=1)
	top_left = points[np.argmin(sum)]
	bottom_right = points[np.argmax(sum)]

	diff = np.diff(points, axis=1)
	top_right = points[np.argmin(diff)]
	bottom_left = points[np.argmax(diff)]

	return top_left, top_right, bottom_left, bottom_right


	def get_page(image: np.array) -> np.array:
	"""
	inspired by: https://github.com/Kakaranish/OpenCV-paper-detection
	"""
	filtered = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	filtered = cv2.medianBlur(filtered, 11)

	canny = cv2.Canny(filtered, 30, 50, 3)
	contours, _ = cv2.findContours(canny, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)

	max_perimeter = 0
	max_contour = None
	for contour in contours:
	contour = np.array(contour)
	perimeter = cv2.arcLength(contour, True)
	contour_approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True)

	if perimeter > max_perimeter and cv2.isContourConvex(contour_approx) and len(contour_approx) == 4:
	max_perimeter = perimeter
	max_contour = contour_approx

	if max_contour is not None:
	max_contour = np.squeeze(max_contour)
	points = order_points(max_contour)

	target_points = find_target_points(*points)
	M = cv2.getPerspectiveTransform(np.float32(points), np.float32(target_points))
	final = cv2.warpPerspective(image, M, (target_points[3][0], target_points[3][1]), flags=cv2.INTER_LINEAR)
	final = crop(final, pixels=10)
	return final

	return image


	def get_words(page: np.array, dilation_size: int = 3):
	gray = cv2.cvtColor(page, cv2.COLOR_BGR2GRAY)
	_, thresholded = cv2.threshold(gray, 125, 1, cv2.THRESH_BINARY_INV)

	dilation_size = dilation_size
	element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * dilation_size + 1, 2 * dilation_size + 1),
	(dilation_size, dilation_size))
	thresholded = cv2.dilate(thresholded, element, iterations=3)

	contours, _ = cv2.findContours(thresholded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

	words = []
	boxes = []

	for contour in contours:
	x, y, w, h = cv2.boundingRect(contour)
	ratio = w / h
	if ratio <= 0.1 or ratio >= 10.0:
	continue
	boxes.append([x, y, w, h])
	words.append(page[y:y+h, x:x+w])

	return words, boxes