Spaces:
Restarting
Restarting
import sys | |
import traceback | |
import pickle | |
import os | |
import concurrent.futures | |
from tqdm import tqdm | |
from font_dataset.font import load_fonts | |
from font_dataset.layout import generate_font_image | |
from font_dataset.text import CorpusGeneratorManager | |
from font_dataset.background import background_image_generator | |
cjk_ratio = 3 | |
train_cnt = 100 | |
val_cnt = 5 | |
test_cnt = 30 | |
train_cnt_cjk = int(train_cnt * cjk_ratio) | |
val_cnt_cjk = int(val_cnt * cjk_ratio) | |
test_cnt_cjk = int(test_cnt * cjk_ratio) | |
dataset_path = "./dataset/font_img" | |
os.makedirs(dataset_path, exist_ok=True) | |
fonts, exclusion_rule = load_fonts() | |
cnt = 0 | |
for font in fonts: | |
if exclusion_rule(font): | |
print(f"Excluded font: {font.path}") | |
continue | |
if font.language == "CJK": | |
cnt += cjk_ratio | |
else: | |
cnt += 1 | |
print("Total training images:", train_cnt * cnt) | |
print("Total validation images:", val_cnt * cnt) | |
print("Total testing images:", test_cnt * cnt) | |
if os.path.exists(os.path.join(dataset_path, "train")): | |
num_file_train = len(os.listdir(os.path.join(dataset_path, "train"))) | |
else: | |
num_file_train = 0 | |
if os.path.exists(os.path.join(dataset_path, "val")): | |
num_file_val = len(os.listdir(os.path.join(dataset_path, "val"))) | |
else: | |
num_file_val = 0 | |
if os.path.exists(os.path.join(dataset_path, "test")): | |
num_file_test = len(os.listdir(os.path.join(dataset_path, "test"))) | |
else: | |
num_file_test = 0 | |
print("Total files generated:", num_file_train + num_file_val + num_file_test) | |
print("Total files target:", (train_cnt + val_cnt + test_cnt) * cnt * 2) | |
print( | |
f"{(num_file_train + num_file_val + num_file_test) / ((train_cnt + val_cnt + test_cnt) * cnt * 2) * 100:.2f}% completed" | |
) | |