File size: 7,888 Bytes
9dce458 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 |
import os
import cv2
import numpy as np
from typing import List
from shapely import affinity
from shapely.geometry import Polygon
from tqdm import tqdm
# from .ballon_extractor import extract_ballon_region
from . import text_render
from .text_render_eng import render_textblock_list_eng
from ..utils import (
BASE_PATH,
TextBlock,
color_difference,
get_logger,
rotate_polygons,
)
logger = get_logger('render')
def parse_font_paths(path: str, default: List[str] = None) -> List[str]:
if path:
parsed = path.split(',')
parsed = list(filter(lambda p: os.path.isfile(p), parsed))
else:
parsed = default or []
return parsed
def fg_bg_compare(fg, bg):
fg_avg = np.mean(fg)
if color_difference(fg, bg) < 30:
bg = (255, 255, 255) if fg_avg <= 127 else (0, 0, 0)
return fg, bg
def resize_regions_to_font_size(img: np.ndarray, text_regions: List[TextBlock], font_size_fixed: int, font_size_offset: int, font_size_minimum: int):
if font_size_minimum == -1:
# Automatically determine font_size by image size
font_size_minimum = round((img.shape[0] + img.shape[1]) / 200)
logger.debug(f'font_size_minimum {font_size_minimum}')
dst_points_list = []
for region in text_regions:
char_count_orig = len(region.text)
char_count_trans = len(region.translation.strip())
if char_count_trans > char_count_orig:
# More characters were added, have to reduce fontsize to fit allotted area
# print('count', char_count_trans, region.font_size)
rescaled_font_size = region.font_size
while True:
rows = region.unrotated_size[0] // rescaled_font_size
cols = region.unrotated_size[1] // rescaled_font_size
if rows * cols >= char_count_trans:
# print(rows, cols, rescaled_font_size, rows * cols, char_count_trans)
# print('rescaled', rescaled_font_size)
region.font_size = rescaled_font_size
break
rescaled_font_size -= 1
if rescaled_font_size <= 0:
break
# Otherwise no need to increase fontsize
# Infer the target fontsize
target_font_size = region.font_size
if font_size_fixed is not None:
target_font_size = font_size_fixed
elif target_font_size < font_size_minimum:
target_font_size = max(region.font_size, font_size_minimum)
target_font_size += font_size_offset
# Rescale dst_points accordingly
if target_font_size != region.font_size:
target_scale = target_font_size / region.font_size
dst_points = region.unrotated_min_rect[0]
poly = Polygon(region.unrotated_min_rect[0])
poly = affinity.scale(poly, xfact=target_scale, yfact=target_scale)
dst_points = np.array(poly.exterior.coords[:4])
dst_points = rotate_polygons(region.center, dst_points.reshape(1, -1), -region.angle).reshape(-1, 4, 2)
# Clip to img width and height
dst_points[..., 0] = dst_points[..., 0].clip(0, img.shape[1])
dst_points[..., 1] = dst_points[..., 1].clip(0, img.shape[0])
dst_points = dst_points.reshape((-1, 4, 2))
region.font_size = int(target_font_size)
else:
dst_points = region.min_rect
dst_points_list.append(dst_points)
return dst_points_list
async def dispatch(
img: np.ndarray,
text_regions: List[TextBlock],
font_path: str = '',
font_size_fixed: int = None,
font_size_offset: int = 0,
font_size_minimum: int = 0,
hyphenate: bool = True,
render_mask: np.ndarray = None,
line_spacing: int = None,
disable_font_border: bool = False
) -> np.ndarray:
text_render.set_font(font_path)
text_regions = list(filter(lambda region: region.translation, text_regions))
# Resize regions that are too small
dst_points_list = resize_regions_to_font_size(img, text_regions, font_size_fixed, font_size_offset, font_size_minimum)
# TODO: Maybe remove intersections
# Render text
for region, dst_points in tqdm(zip(text_regions, dst_points_list), '[render]', total=len(text_regions)):
if render_mask is not None:
# set render_mask to 1 for the region that is inside dst_points
cv2.fillConvexPoly(render_mask, dst_points.astype(np.int32), 1)
img = render(img, region, dst_points, hyphenate, line_spacing, disable_font_border)
return img
def render(
img,
region: TextBlock,
dst_points,
hyphenate,
line_spacing,
disable_font_border
):
fg, bg = region.get_font_colors()
fg, bg = fg_bg_compare(fg, bg)
if disable_font_border :
bg = None
middle_pts = (dst_points[:, [1, 2, 3, 0]] + dst_points) / 2
norm_h = np.linalg.norm(middle_pts[:, 1] - middle_pts[:, 3], axis=1)
norm_v = np.linalg.norm(middle_pts[:, 2] - middle_pts[:, 0], axis=1)
r_orig = np.mean(norm_h / norm_v)
if region.horizontal:
temp_box = text_render.put_text_horizontal(
region.font_size,
region.get_translation_for_rendering(),
round(norm_h[0]),
round(norm_v[0]),
region.alignment,
region.direction == 'hr',
fg,
bg,
region.target_lang,
hyphenate,
line_spacing,
)
else:
temp_box = text_render.put_text_vertical(
region.font_size,
region.get_translation_for_rendering(),
round(norm_v[0]),
region.alignment,
fg,
bg,
line_spacing,
)
h, w, _ = temp_box.shape
r_temp = w / h
# Extend temporary box so that it has same ratio as original
if r_temp > r_orig:
h_ext = int(w / (2 * r_orig) - h / 2)
box = np.zeros((h + h_ext * 2, w, 4), dtype=np.uint8)
box[h_ext:h + h_ext, 0:w] = temp_box
else:
w_ext = int((h * r_orig - w) / 2)
box = np.zeros((h, w + w_ext * 2, 4), dtype=np.uint8)
box[0:h, w_ext:w_ext+w] = temp_box
src_points = np.array([[0, 0], [box.shape[1], 0], [box.shape[1], box.shape[0]], [0, box.shape[0]]]).astype(np.float32)
#src_pts[:, 0] = np.clip(np.round(src_pts[:, 0]), 0, enlarged_w * 2)
#src_pts[:, 1] = np.clip(np.round(src_pts[:, 1]), 0, enlarged_h * 2)
M, _ = cv2.findHomography(src_points, dst_points, cv2.RANSAC, 5.0)
rgba_region = cv2.warpPerspective(box, M, (img.shape[1], img.shape[0]), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=0)
x, y, w, h = cv2.boundingRect(dst_points.astype(np.int32))
canvas_region = rgba_region[y:y+h, x:x+w, :3]
mask_region = rgba_region[y:y+h, x:x+w, 3:4].astype(np.float32) / 255.0
img[y:y+h, x:x+w] = np.clip((img[y:y+h, x:x+w].astype(np.float32) * (1 - mask_region) + canvas_region.astype(np.float32) * mask_region), 0, 255).astype(np.uint8)
return img
async def dispatch_eng_render(img_canvas: np.ndarray, original_img: np.ndarray, text_regions: List[TextBlock], font_path: str = '', line_spacing: int = 0, disable_font_border: bool = False) -> np.ndarray:
if len(text_regions) == 0:
return img_canvas
if not font_path:
font_path = os.path.join(BASE_PATH, 'fonts/comic shanns 2.ttf')
text_render.set_font(font_path)
return render_textblock_list_eng(img_canvas, text_regions, line_spacing=line_spacing, size_tol=1.2, original_img=original_img, downscale_constraint=0.8,disable_font_border=disable_font_border)
|