import PIL
import requests
import torch
import gradio as gr
import random
import datetime
from PIL import Image
import os
import time
from diffusers import (
StableDiffusionInstructPix2PixPipeline,
EulerAncestralDiscreteScheduler,
)
# Loading from Diffusers Library
model_id = "timbrooks/instruct-pix2pix"
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(
model_id, torch_dtype=torch.float16, revision="fp16"
) # , safety_checker=None)
pipe.to("cuda")
pipe.enable_xformers_memory_efficient_attention()
pipe.unet.to(memory_format=torch.channels_last)
counter = 0
help_text = """ Note: Functionality to revert your changes to previous/original image can be released in future versions. For now only forward editing is available.
Some notes from the official [instruct-pix2pix](https://huggingface.co/spaces/timbrooks/instruct-pix2pix) Space by the authors
and from the official [Diffusers docs](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/pix2pix) -
If you're not getting what you want, there may be a few reasons:
1. Is the image not changing enough? Your guidance_scale may be too low. It should be >1. Higher guidance scale encourages to generate images
that are closely linked to the text `prompt`, usually at the expense of lower image quality. This value dictates how similar the output should
be to the input. This pipeline requires a value of at least `1`. It's possible your edit requires larger changes from the original image.
2. Alternatively, you can toggle image_guidance_scale. Image guidance scale is to push the generated image towards the inital image. Image guidance
scale is enabled by setting `image_guidance_scale > 1`. Higher image guidance scale encourages to generate images that are closely
linked to the source image `image`, usually at the expense of lower image quality.
3. I have observed that rephrasing the instruction sometimes improves results (e.g., "turn him into a dog" vs. "make him a dog" vs. "as a dog").
4. Increasing the number of steps sometimes improves results.
5. Do faces look weird? The Stable Diffusion autoencoder has a hard time with faces that are small in the image. Try:
* Cropping the image so the face takes up a larger portion of the frame.
"""
def chat(
image_in,
in_steps,
in_guidance_scale,
in_img_guidance_scale,
image_hid,
img_name,
counter_out,
prompt,
history,
progress=gr.Progress(track_tqdm=True),
):
start = datetime.datetime.now()
progress(0, desc="Starting...")
# if message == "revert": --to add revert functionality later
if counter_out > 0:
edited_image = pipe(
prompt,
image=image_hid,
num_inference_steps=int(in_steps),
guidance_scale=float(in_guidance_scale),
image_guidance_scale=float(in_img_guidance_scale),
).images[0]
if os.path.exists(img_name):
os.remove(img_name)
temp_img_name = img_name[:-4] + str(int(time.time())) + ".png"
# Create a file-like object
with open(temp_img_name, "wb") as fp:
# Save the image to the file-like object
edited_image.save(fp)
# Get the name of the saved image
saved_image_name = fp.name
# edited_image.save(temp_img_name) #, overwrite=True)
counter_out += 1
else:
seed = random.randint(0, 1000000)
img_name = f"./edited_image_{seed}.png"
edited_image = pipe(
prompt,
image=image_in,
num_inference_steps=int(in_steps),
guidance_scale=float(in_guidance_scale),
image_guidance_scale=float(in_img_guidance_scale),
).images[0]
if os.path.exists(img_name):
os.remove(img_name)
with open(img_name, "wb") as fp:
# Save the image to the file-like object
edited_image.save(fp)
# Get the name of the saved image
saved_image_name2 = fp.name
print(f"Ran in {datetime.datetime.now() - start}")
history = history or []
# Resizing (or not) the image for better display and adding supportive sample text
add_text_list = [
"There you go",
"Enjoy your image!",
"Nice work! Wonder what you gonna do next!",
"Way to go!",
"Does this work for you?",
"Something like this?",
]
if counter_out > 0:
response = (
random.choice(add_text_list) + ''
)
history.append((prompt, response))
return history, history, edited_image, temp_img_name, counter_out
else:
response = (
random.choice(add_text_list) + '
'
) # IMG_NAME
history.append((prompt, response))
counter_out += 1
return history, history, edited_image, img_name, counter_out
with gr.Blocks() as demo:
gr.Markdown(
"""
For faster inference without waiting in the queue, you may duplicate the space and upgrade to GPU in settings.
**Note: Please be advised that a safety checker has been implemented in this public space.
Any attempts to generate inappropriate or NSFW images will result in the display of a black screen
as a precautionary measure for the protection of all users. We appreciate your cooperation in
maintaining a safe and appropriate environment for all members of our community.**