Spaces:

zzhao-swansea
/

Sketch2Image

Running on Zero

App Files Files Community

Zeyu Zhao commited on Feb 12

Commit

4f86f8b

1 Parent(s): 1a89cb5

Init the repo.

Browse files

Files changed (13) hide show

README.md +5 -6
app.py +444 -0
requirements.txt +30 -0
src/__pycache__/image_prep.cpython-310.pyc +0 -0
src/__pycache__/img2skt.cpython-312.pyc +0 -0
src/__pycache__/model.cpython-310.pyc +0 -0
src/__pycache__/model.cpython-312.pyc +0 -0
src/__pycache__/pix2pix_turbo.cpython-310.pyc +0 -0
src/__pycache__/pix2pix_turbo.cpython-312.pyc +0 -0
src/image_prep.py +12 -0
src/img2skt.py +83 -0
src/model.py +71 -0
src/pix2pix_turbo.py +209 -0

README.md CHANGED Viewed

@@ -1,14 +1,13 @@
 ---
 title: Sketch2Image
-emoji: 🌖
-colorFrom: indigo
-colorTo: green
 sdk: gradio
-sdk_version: 5.16.0
 app_file: app.py
 pinned: false
-license: apache-2.0
-short_description: A Sketch2Image Demo
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Sketch2Image
+emoji: 📚
+colorFrom: green
+colorTo: blue
 sdk: gradio
+sdk_version: 5.15.0
 app_file: app.py
 pinned: false
+license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,444 @@

+import base64
+import os
+import pdb
+import random
+import sys
+import time
+from io import BytesIO
+import gradio as gr
+import numpy as np
+import spaces
+import torch
+import torchvision.transforms.functional as TF
+from PIL import Image
+from torchvision import transforms
+from src.img2skt import image_to_sketch_gif
+from src.model import make_1step_sched
+from src.pix2pix_turbo import Pix2Pix_Turbo
+model = Pix2Pix_Turbo("sketch_to_image_stochastic")
+style_list = [
+    {
+        "name": "No Style",
+        "prompt": "{prompt}",
+    },
+    {
+        "name": "Cinematic",
+        "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
+    },
+    {
+        "name": "3D Model",
+        "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting",
+    },
+    {
+        "name": "Anime",
+        "prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime,  highly detailed",
+    },
+    {
+        "name": "Digital Art",
+        "prompt": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed",
+    },
+    {
+        "name": "Photographic",
+        "prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed",
+    },
+    {
+        "name": "Pixel art",
+        "prompt": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics",
+    },
+    {
+        "name": "Fantasy art",
+        "prompt": "ethereal fantasy concept art of  {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy",
+    },
+    {
+        "name": "Neonpunk",
+        "prompt": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional",
+    },
+    {
+        "name": "Manga",
+        "prompt": "manga style {prompt} . vibrant, high-energy, detailed, iconic, Japanese comic style",
+    },
+]
+styles = {k["name"]: k["prompt"] for k in style_list}
+STYLE_NAMES = list(styles.keys())
+DEFAULT_STYLE_NAME = "Manga"
+MAX_SEED = np.iinfo(np.int32).max
+HEIGHT = 512
+WIDTH = 512
+ITER_DELAY = 1.0
+# Create a white background image
+def create_white_background(width, height):
+    return Image.new("RGB", (width, height), color="white")
+white_background = create_white_background(WIDTH, HEIGHT)
+@spaces.GPU(duration=45)
+def run(image, prompt, prompt_template, style_name, seed, val_r):
+    image = image["composite"]
+    prompt = prompt_template.replace("{prompt}", prompt)
+    image = image.convert("RGB")
+    image = Image.fromarray(255 - np.array(image))
+    image_t = TF.to_tensor(image) > 0.5
+    with torch.no_grad():
+        c_t = image_t.unsqueeze(0).cuda().float()
+        torch.manual_seed(seed)
+        B, C, H, W = c_t.shape
+        noise = torch.randn((1, 4, H // 8, W // 8), device=c_t.device)
+        output_image = model(c_t, prompt, deterministic=False, r=val_r, noise_map=noise)
+    output_pil = TF.to_pil_image(output_image[0].cpu() * 0.5 + 0.5)
+    return output_pil
+def clear_image_editor():
+    return (
+        {"background": white_background, "layers": None, "composite": white_background},
+        gr.Image(
+            value=None,
+            height=HEIGHT,
+            width=WIDTH,
+            elem_id="output_image",
+            type="pil",
+            show_label=False,
+            show_download_button=True,
+            interactive=False,
+        ),
+        gr.Image(
+            value=None,
+            height=HEIGHT,
+            width=WIDTH,
+            show_label=False,
+            show_download_button=True,
+            type="pil",
+            interactive=False,
+        ),
+        gr.Image(
+            value=None,
+            height=HEIGHT,
+            width=WIDTH,
+            show_label=False,
+            show_download_button=True,
+            type="pil",
+            interactive=False,
+        ),
+        gr.State([]),
+        gr.Slider(
+            minimum=0,
+            maximum=1,
+            value=0,
+            step=1,
+            visible=False,
+            scale=4,
+            label="Frame Selector",
+        ),
+        gr.Button("Stop", scale=1, visible=True),
+    )
+def iter_frames(frames):
+    for frame in frames:
+        time.sleep(ITER_DELAY)
+        yield frame
+def apply_func_click():
+    return gr.Slider(
+        visible=True,
+    )
+with gr.Blocks() as demo:
+    with gr.Row():
+        with gr.Column():
+            # gr.Markdown("## INPUT", elem_id="input_header")
+            with gr.Row():
+                image = gr.Sketchpad(
+                    value={
+                        "background": white_background,
+                        "layers": None,
+                        "composite": white_background,
+                    },
+                    image_mode="L",
+                    type="pil",
+                    sources=None,
+                    # container=True,
+                    label="Sketch",
+                    show_label=True,
+                    show_download_button=True,
+                    # show_share_button=True,
+                    interactive=True,
+                    layers=False,
+                    canvas_size=(WIDTH, HEIGHT),
+                    show_fullscreen_button=False,
+                    brush=gr.Brush(
+                        colors=["#000000", "#FFFFFF"],
+                        color_mode="fixed",
+                        default_size=4,
+                    ),
+                )
+            with gr.Row():
+                prompt = gr.Textbox(label="Prompt", value="", show_label=True)
+            with gr.Row():
+                run_button = gr.Button("Run", scale=1)
+                randomize_seed = gr.Button("Random", scale=1)
+            with gr.Row():
+                apply_button = gr.Button("Stop", scale=1, visible=True)
+            with gr.Row():
+                frame_selector = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    value=0,
+                    step=1,
+                    visible=False,
+                    scale=4,
+                    label="Frame Selector",
+                )
+            with gr.Row():
+                style = gr.Dropdown(
+                    label="Style",
+                    choices=STYLE_NAMES,
+                    value=DEFAULT_STYLE_NAME,
+                    scale=1,
+                    visible=False,
+                )
+                prompt_temp = gr.Textbox(
+                    label="Prompt Style Template",
+                    value=styles[DEFAULT_STYLE_NAME],
+                    max_lines=1,
+                    scale=2,
+                    visible=False,
+                )
+            with gr.Row():
+                val_r = gr.Slider(
+                    label="Sketch guidance: ",
+                    show_label=True,
+                    minimum=0,
+                    maximum=1,
+                    value=0.4,
+                    step=0.01,
+                    scale=4,
+                    visible=False,
+                )
+                seed = gr.Textbox(label="Seed", value=42, scale=4, visible=False)
+        with gr.Column():
+            # gr.Markdown("## OUTPUT", elem_id="output_header")
+            result = gr.Image(
+                height=HEIGHT,
+                width=WIDTH,
+                elem_id="output_image",
+                type="pil",
+                show_label=False,
+                show_download_button=True,
+                interactive=False,
+                visible=False,
+            )
+            gr.Markdown("### Instructions")
+            gr.Markdown("1. Enter a text prompt (e.g. cat)")
+            gr.Markdown("2. Draw some sketches on the Sketchpad")
+            gr.Markdown("3. Click on **Run** to generate the skecthes powered by AI")
+            gr.Markdown(
+                "4. While you see the sketches coming out, click on **Stop** to stop more frames coming out"
+            )
+            gr.Markdown("5. Then you can select a frame by the Frame Selector")
+            gr.Markdown(
+                "6. You may then continue to draw more sketches or change the prompt and repeat the process"
+            )
+            gr.Markdown(
+                "7. You may try different random seeds by clicking on **Random**"
+            )
+            gr.Markdown(
+                "**Thanks to the [paper](https://arxiv.org/abs/2403.12036) and their open-sourced models!**"
+            )
+            frames = gr.State([])
+            sketches = gr.Image(
+                height=HEIGHT,
+                width=WIDTH,
+                show_label=False,
+                show_download_button=True,
+                type="pil",
+                visible=False,
+            )
+            one_frame = gr.Image(
+                height=HEIGHT,
+                width=WIDTH,
+                show_label=False,
+                show_download_button=True,
+                type="pil",
+                interactive=False,
+                visible=False,
+            )
+    inputs = [image, prompt, prompt_temp, style, seed, val_r]
+    outputs = [result]
+    randomize_seed_click = (
+        randomize_seed.click(
+            lambda: random.randint(0, MAX_SEED),
+            inputs=[],
+            outputs=seed,
+        )
+        .then(
+            fn=run,
+            inputs=inputs,
+            outputs=outputs,
+        )
+        .then(
+            image_to_sketch_gif,
+            inputs=[result],
+            outputs=[sketches, frames, frame_selector, apply_button],
+        )
+        .then(
+            iter_frames,
+            inputs=[frames],
+            outputs=[image],
+        )
+    )
+    prompt_submit = (
+        prompt.submit(fn=run, inputs=inputs, outputs=outputs)
+        .then(
+            image_to_sketch_gif,
+            inputs=[result],
+            outputs=[sketches, frames, frame_selector, apply_button],
+        )
+        .then(
+            iter_frames,
+            inputs=[frames],
+            outputs=[image],
+        )
+    )
+    style_change = (
+        style.change(lambda x: styles[x], inputs=[style], outputs=[prompt_temp])
+        .then(
+            fn=run,
+            inputs=inputs,
+            outputs=outputs,
+        )
+        .then(
+            image_to_sketch_gif,
+            inputs=[result],
+            outputs=[sketches, frames, frame_selector, apply_button],
+        )
+        .then(
+            iter_frames,
+            inputs=[frames],
+            outputs=[image],
+        )
+    )
+    val_r_change = (
+        val_r.change(run, inputs=inputs, outputs=outputs)
+        .then(
+            image_to_sketch_gif,
+            inputs=[result],
+            outputs=[sketches, frames, frame_selector, apply_button],
+        )
+        .then(
+            iter_frames,
+            inputs=[frames],
+            outputs=[image],
+        )
+    )
+    run_button_click = (
+        run_button.click(fn=run, inputs=inputs, outputs=outputs)
+        .then(
+            image_to_sketch_gif,
+            inputs=[result],
+            outputs=[sketches, frames, frame_selector, apply_button],
+        )
+        .then(
+            iter_frames,
+            inputs=[frames],
+            outputs=[image],
+        )
+    )
+    image_apply = (
+        image.apply(
+            run,
+            inputs=inputs,
+            outputs=outputs,
+        )
+        .then(
+            image_to_sketch_gif,
+            inputs=[result],
+            outputs=[sketches, frames, frame_selector, apply_button],
+        )
+        .then(
+            iter_frames,
+            inputs=[frames],
+            outputs=[image],
+        )
+    )
+    apply_button.click(
+        fn=None,
+        inputs=None,
+        outputs=None,
+        cancels=[
+            run_button_click,
+            randomize_seed_click,
+            prompt_submit,
+            style_change,
+            val_r_change,
+            image_apply,
+        ],
+    )
+    apply_button.click(
+        fn=apply_func_click,
+        inputs=None,
+        outputs=[frame_selector],
+    )
+    frame_selector.change(
+        lambda x, y: y[x], inputs=[frame_selector, frames], outputs=[image]
+    )
+    image.clear(
+        fn=None,
+        inputs=None,
+        outputs=None,
+        cancels=[
+            run_button_click,
+            randomize_seed_click,
+            prompt_submit,
+            style_change,
+            val_r_change,
+            image_apply,
+        ],
+    )
+    image.clear(
+        fn=clear_image_editor,
+        inputs=None,
+        outputs=[
+            image,
+            result,
+            sketches,
+            one_frame,
+            frames,
+            frame_selector,
+            apply_button,
+        ],
+    )
+if __name__ == "__main__":
+    demo.queue().launch(debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,30 @@

+clip @ git+https://github.com/openai/CLIP.git
+einops>=0.6.1
+numpy>=1.24.4
+open-clip-torch>=2.20.0
+opencv-python==4.6.0.66
+pillow>=9.5.0
+scipy==1.11.1
+timm>=0.9.2
+tokenizers
+torch>=2.1.0
+torchaudio>=2.0.2
+torchdata
+torchmetrics>=1.0.1
+torchvision>=0.15.2
+tqdm>=4.65.0
+transformers==4.43.2
+triton
+urllib3<1.27,>=1.25.4
+xformers>=0.0.20
+accelerate
+streamlit-keyup==0.2.0
+lpips
+clean-fid
+peft
+dominate
+diffusers>=0.25.1
+huggingface_hub>=0.26.0
+hf_transfer

src/__pycache__/image_prep.cpython-310.pyc ADDED Viewed

Binary file (544 Bytes). View file

src/__pycache__/img2skt.cpython-312.pyc ADDED Viewed

Binary file (3.13 kB). View file

src/__pycache__/model.cpython-310.pyc ADDED Viewed

Binary file (699 Bytes). View file

src/__pycache__/model.cpython-312.pyc ADDED Viewed

Binary file (3.06 kB). View file

src/__pycache__/pix2pix_turbo.cpython-310.pyc ADDED Viewed

Binary file (6.84 kB). View file

src/__pycache__/pix2pix_turbo.cpython-312.pyc ADDED Viewed

Binary file (12.1 kB). View file

src/image_prep.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import numpy as np
+from PIL import Image
+import cv2
+def canny_from_pil(image, low_threshold=100, high_threshold=200):
+    image = np.array(image)
+    image = cv2.Canny(image, low_threshold, high_threshold)
+    image = image[:, :, None]
+    image = np.concatenate([image, image, image], axis=2)
+    control_image = Image.fromarray(image)
+    return control_image

src/img2skt.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import io
+import os
+import tempfile
+import cv2
+import gradio as gr
+import numpy as np
+from PIL import Image, ImageSequence
+def image_to_sketch_gif(input_image: Image.Image):
+    # Convert PIL image to OpenCV format
+    open_cv_image = np.array(input_image.convert("RGB"))
+    open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR)
+    # Convert to grayscale
+    grayscale_image = cv2.cvtColor(open_cv_image, cv2.COLOR_BGR2GRAY)
+    # Apply Gaussian blur
+    blurred_image = cv2.GaussianBlur(grayscale_image, (5, 5), 0)
+    # Use Canny Edge Detection
+    edges = cv2.Canny(blurred_image, threshold1=50, threshold2=150)
+    # Ensure binary format
+    _, binary_sketch = cv2.threshold(edges, 128, 255, cv2.THRESH_BINARY)
+    # Find connected components
+    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(
+        binary_sketch, connectivity=8
+    )
+    # Sort components by size (excluding the background, which is label 0)
+    components = sorted(
+        [(i, stats[i, cv2.CC_STAT_AREA]) for i in range(1, num_labels)],
+        key=lambda x: x[1],
+        reverse=True,
+    )
+    # Initialize an empty canvas for accumulation
+    accumulated_image = np.zeros_like(binary_sketch, dtype=np.uint8)
+    # Store frames
+    frames = []
+    for label, _ in components:
+        # Add the current component to the accumulation
+        accumulated_image[labels == label] = 255
+        # Convert OpenCV image to PIL image and append to frames
+        pil_frame = Image.fromarray(255 - accumulated_image)
+        frames.append(pil_frame.copy())
+    # Add the input_input as the final frame
+    frames.append(input_image.copy())
+    # Save GIF to a temporary file
+    tmp_dir = tempfile.gettempdir()  # Get system temp directory
+    tmp_gif_path = os.path.join(tmp_dir, "sketch_animation.gif")
+    frames[0].save(
+        tmp_gif_path,
+        format="GIF",
+        save_all=True,
+        append_images=frames[1:],
+        duration=100,
+        loop=0,
+    )
+    return (
+        tmp_gif_path,
+        frames,
+        gr.Slider(
+            minimum=0,
+            maximum=len(frames) - 1,
+            value=0,
+            step=1,
+            visible=False,
+            scale=4,
+            label="Frame Selector",
+            interactive=True,
+        ),
+        gr.Button("Stop", scale=1, visible=True),
+    )

src/model.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import os, sys, pdb
+import diffusers
+from transformers import AutoTokenizer, PretrainedConfig
+from diffusers import AutoencoderKL, UNet2DConditionModel, DDPMScheduler
+def make_1step_sched():
+    noise_scheduler = DDPMScheduler.from_pretrained(
+        "stabilityai/sd-turbo", subfolder="scheduler"
+    )
+    noise_scheduler_1step = DDPMScheduler.from_pretrained(
+        "stabilityai/sd-turbo", subfolder="scheduler"
+    )
+    noise_scheduler_1step.set_timesteps(1, device="cuda")
+    noise_scheduler_1step.alphas_cumprod = noise_scheduler_1step.alphas_cumprod.cuda()
+    return noise_scheduler_1step
+"""The forward method of the `Encoder` class."""
+def my_vae_encoder_fwd(self, sample):
+    sample = self.conv_in(sample)
+    l_blocks = []
+    # down
+    for down_block in self.down_blocks:
+        l_blocks.append(sample)
+        sample = down_block(sample)
+    # middle
+    sample = self.mid_block(sample)
+    sample = self.conv_norm_out(sample)
+    sample = self.conv_act(sample)
+    sample = self.conv_out(sample)
+    self.current_down_blocks = l_blocks
+    return sample
+"""The forward method of the `Decoder` class."""
+def my_vae_decoder_fwd(self, sample, latent_embeds=None):
+    sample = self.conv_in(sample)
+    upscale_dtype = next(iter(self.up_blocks.parameters())).dtype
+    # middle
+    sample = self.mid_block(sample, latent_embeds)
+    sample = sample.to(upscale_dtype)
+    if not self.ignore_skip:
+        skip_convs = [
+            self.skip_conv_1,
+            self.skip_conv_2,
+            self.skip_conv_3,
+            self.skip_conv_4,
+        ]
+        # up
+        for idx, up_block in enumerate(self.up_blocks):
+            skip_in = skip_convs[idx](self.incoming_skip_acts[::-1][idx] * self.gamma)
+            # add skip
+            sample = sample + skip_in
+            sample = up_block(sample, latent_embeds)
+    else:
+        for idx, up_block in enumerate(self.up_blocks):
+            sample = up_block(sample, latent_embeds)
+    # post-process
+    if latent_embeds is None:
+        sample = self.conv_norm_out(sample)
+    else:
+        sample = self.conv_norm_out(sample, latent_embeds)
+    sample = self.conv_act(sample)
+    sample = self.conv_out(sample)
+    return sample

src/pix2pix_turbo.py ADDED Viewed

	@@ -0,0 +1,209 @@

+import spaces
+import os
+import requests
+import sys
+import pdb
+import copy
+from tqdm import tqdm
+import torch
+from transformers import AutoTokenizer, PretrainedConfig, CLIPTextModel
+from diffusers import AutoencoderKL, UNet2DConditionModel, DDPMScheduler
+from diffusers.utils.peft_utils import set_weights_and_activate_adapters
+from peft import LoraConfig
+p = "src/"
+sys.path.append(p)
+from model import make_1step_sched, my_vae_encoder_fwd, my_vae_decoder_fwd
+class TwinConv(torch.nn.Module):
+    def __init__(self, convin_pretrained, convin_curr):
+        super(TwinConv, self).__init__()
+        self.conv_in_pretrained = copy.deepcopy(convin_pretrained)
+        self.conv_in_curr = copy.deepcopy(convin_curr)
+        self.r = None
+    def forward(self, x):
+        x1 = self.conv_in_pretrained(x).detach()
+        x2 = self.conv_in_curr(x)
+        return x1 * (1 - self.r) + x2 * (self.r)
+class Pix2Pix_Turbo(torch.nn.Module):
+    def __init__(self, name, ckpt_folder="checkpoints"):
+        super().__init__()
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            "stabilityai/sd-turbo", subfolder="tokenizer"
+        )
+        self.text_encoder = CLIPTextModel.from_pretrained(
+            "stabilityai/sd-turbo", subfolder="text_encoder"
+        ).cuda()
+        self.sched = make_1step_sched()
+        vae = AutoencoderKL.from_pretrained("stabilityai/sd-turbo", subfolder="vae")
+        unet = UNet2DConditionModel.from_pretrained(
+            "stabilityai/sd-turbo", subfolder="unet"
+        )
+        if name == "edge_to_image":
+            url = "https://www.cs.cmu.edu/~img2img-turbo/models/edge_to_image_loras.pkl"
+            os.makedirs(ckpt_folder, exist_ok=True)
+            outf = os.path.join(ckpt_folder, "edge_to_image_loras.pkl")
+            if not os.path.exists(outf):
+                print(f"Downloading checkpoint to {outf}")
+                response = requests.get(url, stream=True)
+                total_size_in_bytes = int(response.headers.get("content-length", 0))
+                block_size = 1024  # 1 Kibibyte
+                progress_bar = tqdm(
+                    total=total_size_in_bytes, unit="iB", unit_scale=True
+                )
+                with open(outf, "wb") as file:
+                    for data in response.iter_content(block_size):
+                        progress_bar.update(len(data))
+                        file.write(data)
+                progress_bar.close()
+                if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
+                    print("ERROR, something went wrong")
+                print(f"Downloaded successfully to {outf}")
+            p_ckpt = outf
+            sd = torch.load(p_ckpt, map_location="cpu")
+            unet_lora_config = LoraConfig(
+                r=sd["rank_unet"],
+                init_lora_weights="gaussian",
+                target_modules=sd["unet_lora_target_modules"],
+            )
+        if name == "sketch_to_image_stochastic":
+            # download from url
+            url = "https://www.cs.cmu.edu/~img2img-turbo/models/sketch_to_image_stochastic_lora.pkl"
+            os.makedirs(ckpt_folder, exist_ok=True)
+            outf = os.path.join(ckpt_folder, "sketch_to_image_stochastic_lora.pkl")
+            if not os.path.exists(outf):
+                print(f"Downloading checkpoint to {outf}")
+                response = requests.get(url, stream=True)
+                total_size_in_bytes = int(response.headers.get("content-length", 0))
+                block_size = 1024  # 1 Kibibyte
+                progress_bar = tqdm(
+                    total=total_size_in_bytes, unit="iB", unit_scale=True
+                )
+                with open(outf, "wb") as file:
+                    for data in response.iter_content(block_size):
+                        progress_bar.update(len(data))
+                        file.write(data)
+                progress_bar.close()
+                if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
+                    print("ERROR, something went wrong")
+                print(f"Downloaded successfully to {outf}")
+            p_ckpt = outf
+            sd = torch.load(p_ckpt, map_location="cpu")
+            unet_lora_config = LoraConfig(
+                r=sd["rank_unet"],
+                init_lora_weights="gaussian",
+                target_modules=sd["unet_lora_target_modules"],
+            )
+            convin_pretrained = copy.deepcopy(unet.conv_in)
+            unet.conv_in = TwinConv(convin_pretrained, unet.conv_in)
+        vae.encoder.forward = my_vae_encoder_fwd.__get__(
+            vae.encoder, vae.encoder.__class__
+        )
+        vae.decoder.forward = my_vae_decoder_fwd.__get__(
+            vae.decoder, vae.decoder.__class__
+        )
+        # add the skip connection convs
+        vae.decoder.skip_conv_1 = torch.nn.Conv2d(
+            512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
+        ).cuda()
+        vae.decoder.skip_conv_2 = torch.nn.Conv2d(
+            256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
+        ).cuda()
+        vae.decoder.skip_conv_3 = torch.nn.Conv2d(
+            128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
+        ).cuda()
+        vae.decoder.skip_conv_4 = torch.nn.Conv2d(
+            128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
+        ).cuda()
+        vae_lora_config = LoraConfig(
+            r=sd["rank_vae"],
+            init_lora_weights="gaussian",
+            target_modules=sd["vae_lora_target_modules"],
+        )
+        vae.decoder.ignore_skip = False
+        vae.add_adapter(vae_lora_config, adapter_name="vae_skip")
+        unet.add_adapter(unet_lora_config)
+        _sd_unet = unet.state_dict()
+        for k in sd["state_dict_unet"]:
+            _sd_unet[k] = sd["state_dict_unet"][k]
+        unet.load_state_dict(_sd_unet)
+        @spaces.GPU()
+        def wrapper(unet):
+            unet.enable_xformers_memory_efficient_attention()
+            return unet
+        unet = wrapper(unet)
+        _sd_vae = vae.state_dict()
+        for k in sd["state_dict_vae"]:
+            _sd_vae[k] = sd["state_dict_vae"][k]
+        vae.load_state_dict(_sd_vae)
+        unet.to("cuda")
+        vae.to("cuda")
+        unet.eval()
+        vae.eval()
+        self.unet, self.vae = unet, vae
+        self.vae.decoder.gamma = 1
+        self.timesteps = torch.tensor([999], device="cuda").long()
+    def forward(self, c_t, prompt, deterministic=True, r=1.0, noise_map=None):
+        # encode the text prompt
+        caption_tokens = self.tokenizer(
+            prompt,
+            max_length=self.tokenizer.model_max_length,
+            padding="max_length",
+            truncation=True,
+            return_tensors="pt",
+        ).input_ids.cuda()
+        caption_enc = self.text_encoder(caption_tokens)[0]
+        if deterministic:
+            encoded_control = (
+                self.vae.encode(c_t).latent_dist.sample()
+                * self.vae.config.scaling_factor
+            )
+            model_pred = self.unet(
+                encoded_control,
+                self.timesteps,
+                encoder_hidden_states=caption_enc,
+            ).sample
+            x_denoised = self.sched.step(
+                model_pred, self.timesteps, encoded_control, return_dict=True
+            ).prev_sample
+            self.vae.decoder.incoming_skip_acts = self.vae.encoder.current_down_blocks
+            output_image = (
+                self.vae.decode(x_denoised / self.vae.config.scaling_factor).sample
+            ).clamp(-1, 1)
+        else:
+            # scale the lora weights based on the r value
+            self.unet.set_adapters(["default"], weights=[r])
+            set_weights_and_activate_adapters(self.vae, ["vae_skip"], [r])
+            encoded_control = (
+                self.vae.encode(c_t).latent_dist.sample()
+                * self.vae.config.scaling_factor
+            )
+            # combine the input and noise
+            unet_input = encoded_control * r + noise_map * (1 - r)
+            self.unet.conv_in.r = r
+            unet_output = self.unet(
+                unet_input,
+                self.timesteps,
+                encoder_hidden_states=caption_enc,
+            ).sample
+            self.unet.conv_in.r = None
+            x_denoised = self.sched.step(
+                unet_output, self.timesteps, unet_input, return_dict=True
+            ).prev_sample
+            self.vae.decoder.incoming_skip_acts = self.vae.encoder.current_down_blocks
+            self.vae.decoder.gamma = r
+            output_image = (
+                self.vae.decode(x_denoised / self.vae.config.scaling_factor).sample
+            ).clamp(-1, 1)
+        return output_image