testapi / manga_translator /rendering /text_render_eng.py
Sunday01's picture
up
9dce458
import cv2
import numpy as np
from PIL import Image
from typing import List, Tuple
from .text_render import get_char_glyph, put_char_horizontal, add_color
from .ballon_extractor import extract_ballon_region
from ..utils import TextBlock, rect_distance
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
PUNSET_RIGHT_ENG = {'.', '?', '!', ':', ';', ')', '}', "\""}
class Textline:
def __init__(self, text: str = '', pos_x: int = 0, pos_y: int = 0, length: float = 0, spacing: int = 0) -> None:
self.text = text
self.pos_x = pos_x
self.pos_y = pos_y
self.length = int(length)
self.num_words = 0
if text:
self.num_words += 1
self.spacing = 0
self.add_spacing(spacing)
def append_right(self, word: str, w_len: int, delimiter: str = ''):
self.text = self.text + delimiter + word
if word:
self.num_words += 1
self.length += w_len
def append_left(self, word: str, w_len: int, delimiter: str = ''):
self.text = word + delimiter + self.text
if word:
self.num_words += 1
self.length += w_len
def add_spacing(self, spacing: int):
self.spacing = spacing
self.pos_x -= spacing
self.length += 2 * spacing
def strip_spacing(self):
self.length -= self.spacing * 2
self.pos_x += self.spacing
self.spacing = 0
def render_lines(
textlines: List[Textline],
canvas_h: int,
canvas_w: int,
font_size: int,
stroke_width: int,
line_spacing: int = 0.01,
fg: Tuple[int] = (0, 0, 0),
bg: Tuple[int] = (255, 255, 255)) -> Image.Image:
# bg_size = int(max(font_size * 0.1, 1)) if bg is not None else 0
bg_size = stroke_width
spacing_y = int(font_size * (line_spacing or 0.01))
# make large canvas
canvas_w = max([l.length for l in textlines]) + (font_size + bg_size) * 2
canvas_h = font_size * len(textlines) + spacing_y * (len(textlines) - 1) + (font_size + bg_size) * 2
canvas_text = np.zeros((canvas_h, canvas_w), dtype=np.uint8)
canvas_border = canvas_text.copy()
# pen (x, y)
pen_orig = [font_size + bg_size, font_size + bg_size]
# write stuff
for line in textlines:
pen_line = pen_orig.copy()
pen_line[0] += line.pos_x # center
for c in line.text:
offset_x = put_char_horizontal(font_size, c, pen_line, canvas_text, canvas_border, border_size=bg_size)
pen_line[0] += offset_x
pen_orig[1] += spacing_y + font_size
# colorize
canvas_border = np.clip(canvas_border, 0, 255)
line_box = add_color(canvas_text, fg, canvas_border, bg)
# rect
x, y, width, height = cv2.boundingRect(canvas_border)
return Image.fromarray(line_box[y:y+height, x:x+width])
# c = Image.new('RGBA', (canvas_w, canvas_h), color = (0, 0, 0, 0))
# d = ImageDraw.Draw(c)
# d.fontmode = 'L'
# for line in lines:
# d.text((line.pos_x, line.pos_y), line.text, font=font, fill=font_color, stroke_width=font_size, stroke_fill=stroke_color)
# return c
def seg_eng(text: str) -> List[str]:
"""
Extracts every word from text parameter
"""
# TODO: replace with regexes
text = text.strip().upper().replace(' ', ' ').replace(' .', '.').replace('\n', ' ')
processed_text = ''
# dumb way to ensure spaces between words
text_len = len(text)
for ii, c in enumerate(text):
if c in PUNSET_RIGHT_ENG and ii < text_len - 1:
next_c = text[ii + 1]
if next_c.isalpha() or next_c.isnumeric():
processed_text += c + ' '
else:
processed_text += c
else:
processed_text += c
word_list = processed_text.split(' ')
word_num = len(word_list)
if word_num <= 1:
return word_list
words = []
skip_next = False
for ii, word in enumerate(word_list):
if skip_next:
skip_next = False
continue
if len(word) < 3:
append_left, append_right = False, False
len_word, len_next, len_prev = len(word), -1, -1
if ii < word_num - 1:
len_next = len(word_list[ii + 1])
if ii > 0:
len_prev = len(words[-1])
cond_next = (len_word == 2 and len_next <= 4) or len_word == 1
cond_prev = (len_word == 2 and len_prev <= 4) or len_word == 1
if len_next > 0 and len_prev > 0:
if len_next < len_prev:
append_right = cond_next
else:
append_left = cond_prev
elif len_next > 0:
append_right = cond_next
elif len_prev:
append_left = cond_prev
if append_left:
words[-1] = words[-1] + ' ' + word
elif append_right:
words.append(word + ' ' + word_list[ii + 1])
skip_next = True
else:
words.append(word)
continue
words.append(word)
return words
def layout_lines_aligncenter(
mask: np.ndarray,
words: List[str],
word_lengths: List[int],
delimiter_len: int,
line_height: int,
spacing: int = 0,
delimiter: str = ' ',
max_central_width: float = np.inf,
word_break: bool = False)->List[Textline]:
m = cv2.moments(mask)
mask = 255 - mask
centroid_y = int(m['m01'] / m['m00'])
centroid_x = int(m['m10'] / m['m00'])
# layout the central line, the center word is approximately aligned with the centroid of the mask
num_words = len(words)
len_left, len_right = [], []
wlst_left, wlst_right = [], []
sum_left, sum_right = 0, 0
if num_words > 1:
wl_array = np.array(word_lengths, dtype=np.float64)
wl_cumsums = np.cumsum(wl_array)
wl_cumsums = wl_cumsums - wl_cumsums[-1] / 2 - wl_array / 2
central_index = np.argmin(np.abs(wl_cumsums))
if central_index > 0:
wlst_left = words[:central_index]
len_left = word_lengths[:central_index]
sum_left = np.sum(len_left)
if central_index < num_words - 1:
wlst_right = words[central_index + 1:]
len_right = word_lengths[central_index + 1:]
sum_right = np.sum(len_right)
else:
central_index = 0
pos_y = centroid_y - line_height // 2
pos_x = centroid_x - word_lengths[central_index] // 2
bh, bw = mask.shape[:2]
central_line = Textline(words[central_index], pos_x, pos_y, word_lengths[central_index], spacing)
line_bottom = pos_y + line_height
while sum_left > 0 or sum_right > 0:
left_valid, right_valid = False, False
if sum_left > 0:
new_len_l = central_line.length + len_left[-1] + delimiter_len
new_x_l = centroid_x - new_len_l // 2
new_r_l = new_x_l + new_len_l
if (new_x_l > 0 and new_r_l < bw):
if mask[pos_y: line_bottom, new_x_l].sum()==0 and mask[pos_y: line_bottom, new_r_l].sum() == 0:
left_valid = True
if sum_right > 0:
new_len_r = central_line.length + len_right[0] + delimiter_len
new_x_r = centroid_x - new_len_r // 2
new_r_r = new_x_r + new_len_r
if (new_x_r > 0 and new_r_r < bw):
if mask[pos_y: line_bottom, new_x_r].sum()==0 and mask[pos_y: line_bottom, new_r_r].sum() == 0:
right_valid = True
insert_left = False
if left_valid and right_valid:
if sum_left > sum_right:
insert_left = True
elif left_valid:
insert_left = True
elif not right_valid:
break
if insert_left:
central_line.append_left(wlst_left.pop(-1), len_left[-1] + delimiter_len, delimiter)
sum_left -= len_left.pop(-1)
central_line.pos_x = new_x_l
else:
central_line.append_right(wlst_right.pop(0), len_right[0] + delimiter_len, delimiter)
sum_right -= len_right.pop(0)
central_line.pos_x = new_x_r
if central_line.length > max_central_width:
break
central_line.strip_spacing()
lines = [central_line]
# layout bottom half
if sum_right > 0:
w, wl = wlst_right.pop(0), len_right.pop(0)
pos_x = centroid_x - wl // 2
pos_y = centroid_y + line_height // 2
line_bottom = pos_y + line_height
line = Textline(w, pos_x, pos_y, wl, spacing)
lines.append(line)
sum_right -= wl
while sum_right > 0:
w, wl = wlst_right.pop(0), len_right.pop(0)
sum_right -= wl
new_len = line.length + wl + delimiter_len
new_x = centroid_x - new_len // 2
right_x = new_x + new_len
if new_x <= 0 or right_x >= bw:
line_valid = False
elif mask[pos_y: line_bottom, new_x].sum() > 0 or\
mask[pos_y: line_bottom, right_x].sum() > 0:
line_valid = False
else:
line_valid = True
if line_valid:
line.append_right(w, wl+delimiter_len, delimiter)
line.pos_x = new_x
if new_len > max_central_width:
line_valid = False
if sum_right > 0:
w, wl = wlst_right.pop(0), len_right.pop(0)
sum_right -= wl
else:
line.strip_spacing()
break
if not line_valid:
pos_x = centroid_x - wl // 2
pos_y = line_bottom
line_bottom += line_height
line.strip_spacing()
line = Textline(w, pos_x, pos_y, wl, spacing)
lines.append(line)
# layout top half
if sum_left > 0:
w, wl = wlst_left.pop(-1), len_left.pop(-1)
pos_x = centroid_x - wl // 2
pos_y = centroid_y - line_height // 2 - line_height
line_bottom = pos_y + line_height
line = Textline(w, pos_x, pos_y, wl, spacing)
lines.insert(0, line)
sum_left -= wl
while sum_left > 0:
w, wl = wlst_left.pop(-1), len_left.pop(-1)
sum_left -= wl
new_len = line.length + wl + delimiter_len
new_x = centroid_x - new_len // 2
right_x = new_x + new_len
if new_x <= 0 or right_x >= bw:
line_valid = False
elif mask[pos_y: line_bottom, new_x].sum() > 0 or\
mask[pos_y: line_bottom, right_x].sum() > 0:
line_valid = False
else:
line_valid = True
if line_valid:
line.append_left(w, wl+delimiter_len, delimiter)
line.pos_x = new_x
if new_len > max_central_width:
line_valid = False
if sum_left > 0:
w, wl = wlst_left.pop(-1), len_left.pop(-1)
sum_left -= wl
else:
line.strip_spacing()
break
if not line_valid:
pos_x = centroid_x - wl // 2
pos_y -= line_height
line_bottom = pos_y + line_height
line.strip_spacing()
line = Textline(w, pos_x, pos_y, wl, spacing)
lines.insert(0, line)
# rbgmsk = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
# cv2.circle(rbgmsk, (centroid_x, centroid_y), 10, (255, 0, 0))
# for line in lines:
# cv2.rectangle(rbgmsk, (line.pos_x, line.pos_y), (line.pos_x + line.length, line.pos_y + line_height), (0, 255, 0))
# cv2.imshow('mask', rbgmsk)
# cv2.waitKey(0)
return lines
def render_textblock_list_eng(
img: np.ndarray,
text_regions: List[TextBlock],
font_color = (0, 0, 0),
stroke_color = (255, 255, 255),
delimiter: str = ' ',
line_spacing: int = 0.01,
stroke_width: float = 0.1,
size_tol: float = 1.0,
ballonarea_thresh: float = 2,
downscale_constraint: float = 0.7,
original_img: np.ndarray = None,
disable_font_border: bool = False
) -> np.ndarray:
r"""
Args:
downscale_constraint (float, optional): minimum scaling down ratio, prevent rendered text from being too small
ref_textballon (bool, optional): take text balloons as reference for text layout.
original_img (np.ndarray, optional): original image used to extract text balloons.
"""
def calculate_font_values(font_size: int, words: List[str]):
font_size = int(font_size)
sw = int(font_size * stroke_width)
line_height = int(font_size * 0.8)
delimiter_glyph = get_char_glyph(delimiter, font_size, 0)
delimiter_len = delimiter_glyph.advance.x >> 6
base_length = -1
word_lengths = []
for word in words:
word_length = 0
for cdpt in word:
glyph = get_char_glyph(cdpt, font_size, 0)
char_offset_x = glyph.metrics.horiAdvance >> 6
word_length += char_offset_x
word_lengths.append(word_length)
if word_length > base_length:
base_length = word_length
return font_size, sw, line_height, delimiter_len, base_length, word_lengths
img_pil = Image.fromarray(img)
# Initialize enlarge ratios
for region in text_regions:
region.enlarge_ratio = 1
region.enlarged_xyxy = region.xyxy.copy()
def update_enlarged_xyxy(region):
region.enlarged_xyxy = region.xyxy.copy()
w_diff, h_diff = ((region.xywh[2:] * region.enlarge_ratio) - region.xywh[2:].astype(np.float64)) // 2
region.enlarged_xyxy[0] -= w_diff
region.enlarged_xyxy[2] += w_diff
region.enlarged_xyxy[1] -= h_diff
region.enlarged_xyxy[3] += h_diff
# Adjust enlarge ratios relative to each other to reduce intersections
for region in text_regions:
# If it wasn't changed below already
if region.enlarge_ratio == 1:
# The larger the aspect ratio the more it should try to enlarge the bubble
region.enlarge_ratio = min(max(region.xywh[2] / region.xywh[3], region.xywh[3] / region.xywh[2]) * 1.5, 3)
update_enlarged_xyxy(region)
for region2 in text_regions:
if region is region2:
continue
if rect_distance(*region.enlarged_xyxy, *region2.enlarged_xyxy) == 0: # if intersect
# Get prior distance and adjust both enlargement ratios accordingly
d = rect_distance(*region.xyxy, *region2.xyxy)
l1 = (region.xywh[2] + region.xywh[3]) / 2
l2 = (region2.xywh[2] + region2.xywh[3]) / 2
region.enlarge_ratio = d / (2 * l1) + 1
region2.enlarge_ratio = d / (2 * l2) + 1
update_enlarged_xyxy(region)
update_enlarged_xyxy(region2)
# print('Reducing enlarge ratio to prevent intersection')
# print(region.translation, region.enlarged_xyxy, region.enlarge_ratio)
# print('>->', region2.translation, region2.enlarged_xyxy, region2.enlarge_ratio)
for region in text_regions:
words = seg_eng(region.translation)
if not words:
continue
font_size, sw, line_height, delimiter_len, base_length, word_lengths = calculate_font_values(region.font_size, words)
# non-dl textballon segmentation
# Extract ballon region
ballon_mask, xyxy = extract_ballon_region(original_img, region.xywh, enlarge_ratio=region.enlarge_ratio)
ballon_area = (ballon_mask > 0).sum()
rotated, rx, ry = False, 0, 0
if abs(region.angle) > 3:
rotated = True
region_angle_rad = np.deg2rad(region.angle)
region_angle_sin = np.sin(region_angle_rad)
region_angle_cos = np.cos(region_angle_rad)
rotated_ballon_mask = Image.fromarray(ballon_mask).rotate(region.angle, expand=True)
rotated_ballon_mask = np.array(rotated_ballon_mask)
region.angle %= 360
if region.angle > 0 and region.angle <= 90:
ry = abs(ballon_mask.shape[1] * region_angle_sin)
elif region.angle > 90 and region.angle <= 180:
rx = abs(ballon_mask.shape[1] * region_angle_cos)
ry = rotated_ballon_mask.shape[0]
elif region.angle > 180 and region.angle <= 270:
ry = abs(ballon_mask.shape[0] * region_angle_cos)
rx = rotated_ballon_mask.shape[1]
else:
rx = abs(ballon_mask.shape[0] * region_angle_sin)
ballon_mask = rotated_ballon_mask
line_width = sum(word_lengths) + delimiter_len * (len(word_lengths) - 1)
region_area = line_width * line_height + delimiter_len * (len(words) - 1) * line_height
area_ratio = ballon_area / region_area
resize_ratio = 1
# if ballon_area is smaller than 2*region_area
if area_ratio < ballonarea_thresh:
# resize so that it is 2*region_area
resize_ratio = ballonarea_thresh / area_ratio
ballon_area = int(resize_ratio * ballon_area) # = ballonarea_thresh * line_area
resize_ratio = min(np.sqrt(resize_ratio), (1/downscale_constraint)**2)
rx *= resize_ratio
ry *= resize_ratio
ballon_mask = cv2.resize(ballon_mask, (int(resize_ratio * ballon_mask.shape[1]), int(resize_ratio * ballon_mask.shape[0])))
# new region bbox
region_x, region_y, region_w, region_h = cv2.boundingRect(cv2.findNonZero(ballon_mask))
base_length_word = words[max(enumerate(word_lengths), key = lambda x: x[1])[0]]
if len(base_length_word) == 0 :
continue
lines_needed = len(region.translation) / len(base_length_word)
lines_available = abs(xyxy[3] - xyxy[1]) // line_height + 1
font_size_multiplier = max(min(region_w / (base_length + 2*sw), lines_available / lines_needed), downscale_constraint)
# print(region.translation, font_size, font_size_multiplier, int(font_size * font_size_multiplier))
if font_size_multiplier < 1:
font_size = int(font_size * font_size_multiplier)
font_size, sw, line_height, delimiter_len, base_length, word_lengths = calculate_font_values(font_size, words)
textlines = layout_lines_aligncenter(ballon_mask, words, word_lengths, delimiter_len, line_height, delimiter=delimiter)
line_cy = np.array([line.pos_y for line in textlines]).mean() + line_height / 2
region_cy = region_y + region_h / 2
y_offset = int(round(np.clip(region_cy - line_cy, -line_height, line_height)))
lines_x1, lines_x2 = [], []
for line in textlines:
lines_x1.append(line.pos_x)
lines_x2.append(max(line.pos_x, 0) + line.length)
lines_x1 = np.array(lines_x1)
lines_x2 = np.array(lines_x2)
canvas_x1, canvas_x2 = lines_x1.min() - sw, lines_x2.max() + sw
canvas_y1, canvas_y2 = textlines[0].pos_y - sw, textlines[-1].pos_y + line_height + sw
canvas_h = int(canvas_y2 - canvas_y1)
canvas_w = int(canvas_x2 - canvas_x1)
lines_map = np.zeros_like(ballon_mask, dtype=np.uint8)
for line in textlines:
# line.pos_y += y_offset
cv2.rectangle(lines_map, (line.pos_x - sw, line.pos_y + y_offset), (line.pos_x + line.length + sw, line.pos_y + line_height), 255, -1)
line.pos_x -= canvas_x1
line.pos_y -= canvas_y1
textlines_image = render_lines(textlines, canvas_h, canvas_w, font_size, sw, line_spacing, font_color, stroke_color)
rel_cx = ((canvas_x1 + canvas_x2) / 2 - rx) / resize_ratio
rel_cy = ((canvas_y1 + canvas_y2) / 2 - ry + y_offset) / resize_ratio
lines_area = np.sum(lines_map)
lines_area += (max(0, region_y - canvas_y1) + max(0, canvas_y2 - region_h - region_y)) * canvas_w * 255 \
+ (max(0, region_x - canvas_x1) + max(0, canvas_x2 - region_w - region_x)) * canvas_h * 255
valid_lines_ratio = lines_area / np.sum(cv2.bitwise_and(lines_map, ballon_mask))
if valid_lines_ratio > 1: # text bbox > ballon area
resize_ratio = min(resize_ratio * valid_lines_ratio, (1 / downscale_constraint) ** 2)
if rotated:
rcx = rel_cx * region_angle_cos - rel_cy * region_angle_sin
rcy = rel_cx * region_angle_sin + rel_cy * region_angle_cos
rel_cx = rcx
rel_cy = rcy
textlines_image = textlines_image.rotate(-region.angle, expand=True, resample=Image.BILINEAR)
textlines_image = textlines_image.crop(textlines_image.getbbox())
abs_cx = rel_cx + xyxy[0]
abs_cy = rel_cy + xyxy[1]
if resize_ratio != 1:
textlines_image = textlines_image.resize((int(textlines_image.width / resize_ratio), int(textlines_image.height / resize_ratio)))
abs_x = int(abs_cx - textlines_image.width / 2)
abs_y = int(abs_cy - textlines_image.height / 2)
img_pil.paste(textlines_image, (abs_x, abs_y), mask=textlines_image)
# cv2.imshow('ballon_region', ballon_region)
# cv2.imshow('cropped', original_img[xyxy[1]:xyxy[3], xyxy[0]:xyxy[2]])
# cv2.imshow('raw_lines', np.array(raw_lines))
# cv2.waitKey(0)
return np.array(img_pil)