|
|
import gradio as gr |
|
|
import numpy as np |
|
|
import random |
|
|
import torch |
|
|
from diffusers import DiffusionPipeline |
|
|
|
|
|
|
|
|
MODEL_REPO_ID = "stabilityai/sdxl-turbo" |
|
|
MAX_SEED = np.iinfo(np.int32).max |
|
|
MAX_IMAGE_SIZE = 1024 |
|
|
|
|
|
def get_torch_dtype(): |
|
|
return torch.float16 if torch.cuda.is_available() else torch.float32 |
|
|
|
|
|
def get_device(): |
|
|
return "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
|
|
|
def get_pipe(): |
|
|
if not hasattr(get_pipe, "pipe"): |
|
|
pipe = DiffusionPipeline.from_pretrained(MODEL_REPO_ID, torch_dtype=get_torch_dtype()).to(get_device()) |
|
|
get_pipe.pipe = pipe |
|
|
return get_pipe.pipe |
|
|
|
|
|
|
|
|
def build_prompt(word): |
|
|
return ( |
|
|
f"Create a powerful, emotionally resonant image that vividly illustrates the meaning of the word '{word}', " |
|
|
f"so that even someone who doesn’t speak English can understand it instantly. " |
|
|
f"The visual should be sharp, symbolic, and universally relatable. " |
|
|
f"Seamlessly weave the word '{word}' into the scene—clearly spelled but not overpowering—" |
|
|
f"so it supports the concept without drawing attention away. " |
|
|
f"Format: 1080x1080 pixels (square) for Instagram in a (.png) format." |
|
|
) |
|
|
|
|
|
|
|
|
def generate_image(prompt, negative_prompt, guidance_scale, num_inference_steps, width, height, seed): |
|
|
generator = torch.Generator().manual_seed(seed) |
|
|
with torch.inference_mode(): |
|
|
return get_pipe()( |
|
|
prompt=prompt, |
|
|
negative_prompt=negative_prompt, |
|
|
guidance_scale=guidance_scale, |
|
|
num_inference_steps=num_inference_steps, |
|
|
width=width, |
|
|
height=height, |
|
|
generator=generator, |
|
|
).images[0] |
|
|
|
|
|
|
|
|
def infer( |
|
|
word, |
|
|
negative_prompt, |
|
|
seed, |
|
|
randomize_seed, |
|
|
width, |
|
|
height, |
|
|
guidance_scale, |
|
|
num_inference_steps, |
|
|
progress=gr.Progress(track_tqdm=True), |
|
|
): |
|
|
if randomize_seed: |
|
|
seed = random.randint(0, MAX_SEED) |
|
|
|
|
|
prompt = build_prompt(word) |
|
|
image = generate_image(prompt, negative_prompt, guidance_scale, num_inference_steps, width, height, seed) |
|
|
return image, seed |
|
|
|
|
|
css = """ |
|
|
#col-container { |
|
|
margin: 0 auto; |
|
|
max-width: 640px; |
|
|
} |
|
|
""" |
|
|
|
|
|
with gr.Blocks(css=css) as demo: |
|
|
with gr.Column(elem_id="col-container"): |
|
|
gr.Markdown(" # Word-to-Image Generator for Instagram 🎨") |
|
|
|
|
|
with gr.Row(): |
|
|
word = gr.Text( |
|
|
label="Vocabulary Word", |
|
|
show_label=False, |
|
|
max_lines=1, |
|
|
placeholder="Enter a vocabulary word", |
|
|
container=False, |
|
|
) |
|
|
run_button = gr.Button("Generate Image", scale=0, variant="primary") |
|
|
|
|
|
result = gr.Image(label="Generated Image", show_label=False) |
|
|
|
|
|
with gr.Accordion("Advanced Settings", open=False): |
|
|
negative_prompt = gr.Text( |
|
|
label="Negative prompt", |
|
|
max_lines=1, |
|
|
placeholder="Enter a negative prompt", |
|
|
visible=False, |
|
|
) |
|
|
seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0) |
|
|
randomize_seed = gr.Checkbox(label="Randomize seed", value=True) |
|
|
|
|
|
with gr.Row(): |
|
|
width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1080) |
|
|
height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1080) |
|
|
|
|
|
with gr.Row(): |
|
|
guidance_scale = gr.Slider(label="Guidance scale", minimum=0.0, maximum=10.0, step=0.1, value=3.5) |
|
|
num_inference_steps = gr.Slider(label="Inference steps", minimum=1, maximum=50, step=1, value=4) |
|
|
|
|
|
run_button.click( |
|
|
fn=infer, |
|
|
inputs=[word, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps], |
|
|
outputs=[result, seed], |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|