|
import gradio as gr |
|
import torch |
|
from PIL import Image |
|
|
|
from lambda_diffusers import StableDiffusionImageEmbedPipeline |
|
|
|
def ask(input_im, scale, steps, seed, images): |
|
images = images |
|
generator = torch.Generator(device=device).manual_seed(int(seed)) |
|
|
|
images_list = pipe( |
|
2*[input_im], |
|
guidance_scale=scale, |
|
num_inference_steps=steps, |
|
generator=generator, |
|
) |
|
|
|
for i, image in enumerate(images_list["sample"]): |
|
if(images_list["nsfw_content_detected"][i]): |
|
safe_image = Image.open(r"unsafe.png") |
|
images.append(safe_image) |
|
else: |
|
images.append(image) |
|
return images |
|
|
|
def main(input_im, scale, steps, seed): |
|
|
|
images = [] |
|
images = ask(input_im, scale, n_samples, steps, seed, images) |
|
images = ask(input_im, scale, n_samples, steps, seed, images) |
|
|
|
return images |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
pipe = StableDiffusionImageEmbedPipeline.from_pretrained( |
|
"lambdalabs/sd-image-variations-diffusers", |
|
revision="273115e88df42350019ef4d628265b8c29ef4af5", |
|
) |
|
pipe = pipe.to(device) |
|
|
|
inputs = [ |
|
gr.Image(), |
|
gr.Slider(0, 25, value=3, step=1, label="Guidance scale"), |
|
gr.Slider(5, 50, value=25, step=5, label="Steps"), |
|
gr.Slider(label = "Seed", minimum = 0, maximum = 2147483647, step = 1, randomize = True) |
|
] |
|
output = gr.Gallery(label="Generated variations") |
|
output.style(grid=2, height="") |
|
|
|
description = \ |
|
""" |
|
<p style='text-align: center;'> |
|
|
|
__This demo is running on CPU. Working version fixed by @fffiloni. You'll get 4 images variations. NSFW filters enabled.__ |
|
|
|
<img id='visitor-badge' alt='visitor badge' src='https://visitor-badge.glitch.me/badge?page_id=gradio-blocks.sd-img-variations' style='display: inline-block' /><br /> |
|
Generate variations on an input image using a fine-tuned version of Stable Diffusion. |
|
Trained by [Justin Pinkney](https://www.justinpinkney.com) ([@Buntworthy](https://twitter.com/Buntworthy)) at [Lambda](https://lambdalabs.com/) |
|
This version has been ported to π€ Diffusers library, see more details on how to use this version in the [Lambda Diffusers repo](https://github.com/LambdaLabsML/lambda-diffusers). |
|
|
|
__For the original training code see [this repo](https://github.com/justinpinkney/stable-diffusion).__ |
|
 |
|
</p> |
|
""" |
|
|
|
article = \ |
|
""" |
|
## How does this work? |
|
The normal Stable Diffusion model is trained to be conditioned on text input. This version has had the original text encoder (from CLIP) removed, and replaced with |
|
the CLIP _image_ encoder instead. So instead of generating images based a text input, images are generated to match CLIP's embedding of the image. |
|
This creates images which have the same rough style and content, but different details, in particular the composition is generally quite different. |
|
This is a totally different approach to the img2img script of the original Stable Diffusion and gives very different results. |
|
The model was fine tuned on the [LAION aethetics v2 6+ dataset](https://laion.ai/blog/laion-aesthetics/) to accept the new conditioning. |
|
Training was done on 4xA6000 GPUs on [Lambda GPU Cloud](https://lambdalabs.com/service/gpu-cloud). |
|
More details on the method and training will come in a future blog post. |
|
""" |
|
|
|
demo = gr.Interface( |
|
fn=main, |
|
title="Stable Diffusion Image Variations", |
|
inputs=inputs, |
|
outputs=output, |
|
description=description, |
|
article=article |
|
) |
|
demo.launch() |
|
|