# encoding=utf8 import os import cv2 import gradio as gr import numpy as np import re import json from huggingface_hub import login from functions import * from gradio.components import Component login(token=os.getenv('LOGIN_TOKEN')) css = './css/style.css' # Initial a Gradio Block with specific theme block = gr.Blocks( theme=gr.themes.Base(), css=css ).queue() # Load javascript plugin with open('javascript/bboxHint.js', 'r', encoding="utf-8") as file: value = file.read() escaped_value = json.dumps(value) with block: block.load( fn=None, _js=f"""() => {{ const script = document.createElement("script"); const text = document.createTextNode({escaped_value}); script.appendChild(text); document.head.appendChild(script); }}""" ) gr.HTML( '<div style="text-align: center; margin: 20px auto;"> \ <h1 style="font-size:5em">JoyType</h1> \ <h1 style="font-size:2.5em">A Robust Design for Multilingual Visual Text Creation</h1> \ </div>' ) with gr.Row(): with gr.Column(scale=3): with gr.Accordion('Basic Settings(基础设置)', open=True): with gr.Row(variant='compact'): usr_prompt = gr.Textbox(label='Prompt(提示词)', elem_id='usr_prompt') with gr.Row(variant='compact'): base_model = gr.Dropdown( value='JoyType.v1.0', choices=model_list, label='Base Model(基模型)', elem_id='base_model', allow_custom_value=False ) with gr.Accordion('Advanced Settings(高级设置)', open=False): with gr.Row(variant='compact'): image_width = gr.Slider(label='Image Width(宽度)', minimum=256, maximum=768, value=512, step=32) image_height = gr.Slider(label='Image Height(高度)', minimum=256, maximum=768, value=512, step=32) with gr.Row(variant='compact'): num_samples = gr.Slider(label='Samples(生成数量)', minimum=1, maximum=4, value=2, step=1) inference_steps = gr.Slider(label='Steps(推理步数)', minimum=10, maximum=50, value=20, step=1) with gr.Row(variant='compact'): conditioning_scale = gr.Slider(label='Text Strength(文字强度)', minimum=0.1, maximum=2., value=1., step=0.1) cfg_scale = gr.Slider(label='CFG Scale(CFG制强度)', minimum=1, maximum=20, value=7.5, step=0.5) with gr.Row(variant='compact'): seed = gr.Slider(label='Seed(随机种子)', minimum=-1, maximum=2147483647, value=-1, step=1) scheduler_name = gr.Dropdown( value='PNDM', choices=[ 'PNDM', 'LMS', 'Euler', 'DPM', 'DDIM', 'Heun', 'Euler-Ancestral' ], label='Scheduler(采样器)', allow_custom_value=False ) with gr.Row(variant='compact'): a_prompt = gr.Textbox( label='Added Prompt(附加提示词)', max_lines=2, value='best quality, extremely detailed, supper legible text, ' 'clear text edges, clear strokes, neat writing, no watermarks' ) with gr.Row(variant='compact'): n_prompt = gr.Textbox( label='Negative Prompt(负向提示词)', max_lines=2, value='low-res, bad anatomy, extra digit, fewer digits, cropped, worst quality, ' 'low quality, watermark, unreadable text, messy words, distorted text, ' 'disorganized writing, advertising picture' ) base_model.change( fn=change_settings, inputs=base_model, outputs=[inference_steps, cfg_scale, scheduler_name] ) with gr.Row(): with gr.Tab('Text Editing(文字编辑)', elem_id='MD-tab-t2i'): with gr.Row(variant='compact'): choice = gr.Slider( label=f'Text Boxes(可编辑文字框)', minimum=0, maximum=8, step=1, value=BBOX_INI_NUM ) with gr.Row(): with gr.Column(scale=2): rect_img = gr.Image( value=create_canvas(), label='Rect Position', elem_id='MD-bbox-rect-t2i', show_label=False, visible=True, height=300 ) with gr.Column(scale=3): rect_cb_list: list[Component] = [] rect_box_list: list[Component] = [] rect_font_name_list: list[Component] = [] rect_usr_text_list: list[Component] = [] with gr.Column(): with gr.Row(elem_id='row_show'): with gr.Column(scale=1, min_width=20): gr.Markdown('<p align="center">Font(字体)</p>', elem_id='markdown_1') with gr.Column(scale=2, min_width=20): gr.Markdown('<p align="center">Text(文字内容)</p>', elem_id='markdown_2') row_layout = [gr.Row() for _ in range(BBOX_MAX_NUM)] for i in range(BBOX_MAX_NUM): visible = True if i < BBOX_INI_NUM else False with row_layout[i]: fn = gr.Dropdown( choices=font_list, label='Font(字体)', value='CHN-华文行楷', visible=visible, show_label=False, scale=1, allow_custom_value=False, min_width=90, elem_id=f'font_input_{i}', container=False ) ut = gr.Textbox( label='Text(文字内容)', visible=visible, scale=2, show_label=False, elem_id=f'text_input_{i}', container=False, max_lines=1 ) e = gr.Checkbox(label=f'{i}', value=visible, visible=False, min_width=10) x = gr.Slider(label='x', value=0.4, minimum=0.0, maximum=1.0, step=0.0001, elem_id=f'MD-t2i-{i}-x', visible=False) y = gr.Slider(label='y', value=0.4, minimum=0.0, maximum=1.0, step=0.0001, elem_id=f'MD-t2i-{i}-y', visible=False) w = gr.Slider(label='w', value=0.2, minimum=0.0, maximum=1.0, step=0.0001, elem_id=f'MD-t2i-{i}-w', visible=False) h = gr.Slider(label='h', value=0.2, minimum=0.0, maximum=1.0, step=0.0001, elem_id=f'MD-t2i-{i}-h', visible=False) x.change(fn=None, inputs=x, outputs=x, _js=f'v => onBoxChange({i}, "x", v)', show_progress=False, queue=False) y.change(fn=None, inputs=y, outputs=y, _js=f'v => onBoxChange({i}, "y", v)', show_progress=False, queue=False) w.change(fn=None, inputs=w, outputs=w, _js=f'v => onBoxChange({i}, "w", v)', show_progress=False, queue=False) h.change(fn=None, inputs=h, outputs=h, _js=f'v => onBoxChange({i}, "h", v)', show_progress=False, queue=False) e.change(fn=None, inputs=e, outputs=e, _js=f'e => onBoxEnableClick({i}, e)', queue=False) rect_cb_list.extend([e]) rect_box_list.extend([x, y, w, h]) rect_font_name_list.extend([fn]) rect_usr_text_list.extend([ut]) choice.change( fn=update_box_num, inputs=[choice], outputs=[ *rect_cb_list, *rect_font_name_list, *rect_usr_text_list, *rect_box_list ] ) with gr.Row(): gr.Markdown('') run_edit = gr.Button(value='Run(运行)', elem_classes='run', elem_id='run_edit') gr.Markdown('') with gr.Row(): with gr.Accordion(label='Examples(示例)', open=True): img_container = gr.Image(visible=False, label='Text Layout(文字布局)') example_id = gr.Textbox(value=-1, visible=False, label='ID(编号)') gen_examples = gr.Examples( [ [1, 'templates/1.png', 'landscape, Chinese style, ink peaks, poster', model_list[0], 1648703813, 3, 1], [2, 'templates/2.png', 'a clock and medicine bottle has texts and "time"', model_list[0], 1654615998, 2, 1], [3, 'templates/3.png', '漂亮的风景照,很多山峰,清澈的湖水', model_list[3], 2078698098, 3, 1], [4, 'templates/4.png', 'a vodka, on the bar, dim background', model_list[2], 443791646, 3, 1], [5, 'templates/5.png', '画有玫瑰的卡片,明亮的背景', model_list[4], 516210890, 2, 1], [6, 'templates/6.png', 'posters on the table, with pens, clear background, starry sky, moon', model_list[1], 228167646, 4, 1], [7, 'templates/7.png', 'snowy landscape, domed cabin, winter scene, cozy atmosphere, soft lighting', model_list[5], 695897181, 3, 1], [8, 'templates/8.png', '一张关于健康教育的卡片,上面有一些文字,有一些食物图标,背景里有一些水果喝饮料的图标,且背景是模糊的', model_list[1], 936188591, 6, 1], ], [example_id, img_container, usr_prompt, base_model, seed, choice, num_samples], examples_per_page=5, label='' ) example_id.change( fn=load_box_list, inputs=[example_id, choice], outputs=[ *rect_cb_list, *rect_font_name_list, *rect_usr_text_list, *rect_box_list, example_id ] ) rect_img.clear(re_edit, None, [*rect_box_list, rect_img, image_width, image_height]) image_width.release(resize_w, [image_width, rect_img], rect_img) image_height.release(resize_h, [image_height, rect_img], rect_img) with gr.Column(scale=2): with gr.Row(): result_gallery = gr.Gallery( label='Result(结果)', show_label=True, preview=True, columns=8, allow_preview=True, elem_id='gallery' ) with gr.Row(): with gr.Tab("Introduction"): gr.Markdown('<span style="color:#3B5998;font-size:20px">What we can do</span>') gr.Markdown( '<span style="color:black;font-size:15px">Generating images with accurately represented text in multi-language.</span>') gr.Markdown('<span style="color:#3B5998;font-size:20px">How to use</span>') gr.Markdown( '<span style="color:black;font-size:15px">Enter a description of the image you want to generate in the "Prompt" text box.</span>') gr.Markdown('<span style="color:#3B5998;font-size:18px">Text Editing</span>') gr.Markdown( '<span style="color:black;font-size:15px">You can drag the "Text Boxes" slider to set the number of text to be laid out, ' 'and set the corresponding font and text content respectively, Note that there must be no overlap between the text boxes, ' 'or the model will not generate an image.</span>') gr.Markdown( '<span style="color:black;font-size:15px">Finally, click the Run button to generate a picture!</span>') with gr.Tab("说明"): gr.Markdown('<span style="color:#3B5998;font-size:20px">我们能做什么</span>') gr.Markdown('<span style="color:black;font-size:15px">在多种语言上生成具有准确文本的图像</span>') gr.Markdown('<span style="color:#3B5998;font-size:20px">如何使用</span>') gr.Markdown( '<span style="color:black;font-size:15px">在“提示词”文本框中输入你想要生成的图片所对应的文字描述。</span>') gr.Markdown('<span style="color:#3B5998;font-size:18px">文本编辑</span>') gr.Markdown( '<span style="color:black;font-size:15px">你可以拖动“可编辑文字框”滑块来设置需要布局的文字数量,并分别设置对应的字体和文字内容;' '请注意,文本框之间不能有重叠,否则模型将不会生成图片。</span>') gr.Markdown('<span style="color:black;font-size:15px">最后点击运行按钮,即可生成图片!</span>') with gr.Row(): result_info = gr.Markdown('debug', visible=False) args = [ num_samples, a_prompt, n_prompt, conditioning_scale, cfg_scale, inference_steps, seed, usr_prompt, rect_img, base_model, scheduler_name, gr.State(BBOX_MAX_NUM), *(rect_cb_list + rect_box_list + rect_font_name_list + rect_usr_text_list) ] run_edit.click( fn=process, inputs=args, outputs=[result_gallery, result_info] ) if __name__ == "__main__": block.launch( server_name='0.0.0.0', share=True, )