Merge branch 'main' into text2igm
Browse files- app-controlnet.py +10 -4
- app-img2img.py +8 -2
- app-txt2img.py +8 -2
- latent_consistency_controlnet.py +20 -15
- requirements.txt +1 -1
app-controlnet.py
CHANGED
@@ -35,6 +35,7 @@ import psutil
|
|
35 |
MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
|
36 |
TIMEOUT = float(os.environ.get("TIMEOUT", 0))
|
37 |
SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
|
|
|
38 |
WIDTH = 512
|
39 |
HEIGHT = 512
|
40 |
# disable tiny autoencoder for better quality speed tradeoff
|
@@ -100,15 +101,20 @@ pipe.unet.to(memory_format=torch.channels_last)
|
|
100 |
if psutil.virtual_memory().total < 64 * 1024**3:
|
101 |
pipe.enable_attention_slicing()
|
102 |
|
103 |
-
if not mps_available and not xpu_available:
|
104 |
-
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
|
105 |
-
pipe(prompt="warmup", image=[Image.new("RGB", (768, 768))], control_image=[Image.new("RGB", (768, 768))])
|
106 |
-
|
107 |
compel_proc = Compel(
|
108 |
tokenizer=pipe.tokenizer,
|
109 |
text_encoder=pipe.text_encoder,
|
110 |
truncate_long_prompts=False,
|
111 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
user_queue_map = {}
|
113 |
|
114 |
|
|
|
35 |
MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
|
36 |
TIMEOUT = float(os.environ.get("TIMEOUT", 0))
|
37 |
SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
|
38 |
+
TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
|
39 |
WIDTH = 512
|
40 |
HEIGHT = 512
|
41 |
# disable tiny autoencoder for better quality speed tradeoff
|
|
|
101 |
if psutil.virtual_memory().total < 64 * 1024**3:
|
102 |
pipe.enable_attention_slicing()
|
103 |
|
|
|
|
|
|
|
|
|
104 |
compel_proc = Compel(
|
105 |
tokenizer=pipe.tokenizer,
|
106 |
text_encoder=pipe.text_encoder,
|
107 |
truncate_long_prompts=False,
|
108 |
)
|
109 |
+
if TORCH_COMPILE:
|
110 |
+
pipe.text_encoder = torch.compile(pipe.text_encoder, mode="max-autotune", fullgraph=False)
|
111 |
+
pipe.tokenizer = torch.compile(pipe.tokenizer, mode="max-autotune", fullgraph=False)
|
112 |
+
pipe.unet = torch.compile(pipe.unet, mode="max-autotune", fullgraph=False)
|
113 |
+
pipe.vae = torch.compile(pipe.vae, mode="max-autotune", fullgraph=False)
|
114 |
+
|
115 |
+
pipe(prompt="warmup", image=[Image.new("RGB", (768, 768))], control_image=[Image.new("RGB", (768, 768))])
|
116 |
+
|
117 |
+
|
118 |
user_queue_map = {}
|
119 |
|
120 |
|
app-img2img.py
CHANGED
@@ -29,6 +29,8 @@ import psutil
|
|
29 |
MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
|
30 |
TIMEOUT = float(os.environ.get("TIMEOUT", 0))
|
31 |
SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
|
|
|
|
|
32 |
WIDTH = 512
|
33 |
HEIGHT = 512
|
34 |
# disable tiny autoencoder for better quality speed tradeoff
|
@@ -76,8 +78,12 @@ pipe.unet.to(memory_format=torch.channels_last)
|
|
76 |
if psutil.virtual_memory().total < 64 * 1024**3:
|
77 |
pipe.enable_attention_slicing()
|
78 |
|
79 |
-
if
|
80 |
-
pipe.
|
|
|
|
|
|
|
|
|
81 |
pipe(prompt="warmup", image=[Image.new("RGB", (512, 512))])
|
82 |
|
83 |
compel_proc = Compel(
|
|
|
29 |
MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
|
30 |
TIMEOUT = float(os.environ.get("TIMEOUT", 0))
|
31 |
SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
|
32 |
+
TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
|
33 |
+
|
34 |
WIDTH = 512
|
35 |
HEIGHT = 512
|
36 |
# disable tiny autoencoder for better quality speed tradeoff
|
|
|
78 |
if psutil.virtual_memory().total < 64 * 1024**3:
|
79 |
pipe.enable_attention_slicing()
|
80 |
|
81 |
+
if TORCH_COMPILE:
|
82 |
+
pipe.text_encoder = torch.compile(pipe.text_encoder, mode="max-autotune", fullgraph=False)
|
83 |
+
pipe.tokenizer = torch.compile(pipe.tokenizer, mode="max-autotune", fullgraph=False)
|
84 |
+
pipe.unet = torch.compile(pipe.unet, mode="max-autotune", fullgraph=False)
|
85 |
+
pipe.vae = torch.compile(pipe.vae, mode="max-autotune", fullgraph=False)
|
86 |
+
|
87 |
pipe(prompt="warmup", image=[Image.new("RGB", (512, 512))])
|
88 |
|
89 |
compel_proc = Compel(
|
app-txt2img.py
CHANGED
@@ -30,6 +30,8 @@ import psutil
|
|
30 |
MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
|
31 |
TIMEOUT = float(os.environ.get("TIMEOUT", 0))
|
32 |
SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
|
|
|
|
|
33 |
WIDTH = 768
|
34 |
HEIGHT = 768
|
35 |
# disable tiny autoencoder for better quality speed tradeoff
|
@@ -76,8 +78,12 @@ pipe.unet.to(memory_format=torch.channels_last)
|
|
76 |
if psutil.virtual_memory().total < 64 * 1024**3:
|
77 |
pipe.enable_attention_slicing()
|
78 |
|
79 |
-
if
|
80 |
-
pipe.
|
|
|
|
|
|
|
|
|
81 |
pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
|
82 |
|
83 |
compel_proc = Compel(
|
|
|
30 |
MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
|
31 |
TIMEOUT = float(os.environ.get("TIMEOUT", 0))
|
32 |
SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
|
33 |
+
TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
|
34 |
+
|
35 |
WIDTH = 768
|
36 |
HEIGHT = 768
|
37 |
# disable tiny autoencoder for better quality speed tradeoff
|
|
|
78 |
if psutil.virtual_memory().total < 64 * 1024**3:
|
79 |
pipe.enable_attention_slicing()
|
80 |
|
81 |
+
if TORCH_COMPILE:
|
82 |
+
pipe.text_encoder = torch.compile(pipe.text_encoder, mode="max-autotune", fullgraph=False)
|
83 |
+
pipe.tokenizer = torch.compile(pipe.tokenizer, mode="max-autotune", fullgraph=False)
|
84 |
+
pipe.unet = torch.compile(pipe.unet, mode="max-autotune", fullgraph=False)
|
85 |
+
pipe.vae = torch.compile(pipe.vae, mode="max-autotune", fullgraph=False)
|
86 |
+
|
87 |
pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
|
88 |
|
89 |
compel_proc = Compel(
|
latent_consistency_controlnet.py
CHANGED
@@ -25,7 +25,6 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
|
25 |
|
26 |
from diffusers import (
|
27 |
AutoencoderKL,
|
28 |
-
AutoencoderTiny,
|
29 |
ConfigMixin,
|
30 |
DiffusionPipeline,
|
31 |
SchedulerMixin,
|
@@ -50,6 +49,17 @@ import PIL.Image
|
|
50 |
|
51 |
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
class LatentConsistencyModelPipeline_controlnet(DiffusionPipeline):
|
54 |
_optional_components = ["scheduler"]
|
55 |
|
@@ -276,22 +286,17 @@ class LatentConsistencyModelPipeline_controlnet(DiffusionPipeline):
|
|
276 |
)
|
277 |
|
278 |
elif isinstance(generator, list):
|
279 |
-
|
280 |
-
|
281 |
-
self.vae.encode(image[i : i + 1])
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
init_latents = [
|
286 |
-
self.vae.encode(image[i : i + 1]).latent_dist.sample(generator[i])
|
287 |
-
for i in range(batch_size)
|
288 |
-
]
|
289 |
init_latents = torch.cat(init_latents, dim=0)
|
290 |
else:
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
init_latents = self.vae.encode(image).latent_dist.sample(generator)
|
295 |
|
296 |
init_latents = self.vae.config.scaling_factor * init_latents
|
297 |
|
|
|
25 |
|
26 |
from diffusers import (
|
27 |
AutoencoderKL,
|
|
|
28 |
ConfigMixin,
|
29 |
DiffusionPipeline,
|
30 |
SchedulerMixin,
|
|
|
49 |
|
50 |
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
51 |
|
52 |
+
|
53 |
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.retrieve_latents
|
54 |
+
def retrieve_latents(encoder_output, generator):
|
55 |
+
if hasattr(encoder_output, "latent_dist"):
|
56 |
+
return encoder_output.latent_dist.sample(generator)
|
57 |
+
elif hasattr(encoder_output, "latents"):
|
58 |
+
return encoder_output.latents
|
59 |
+
else:
|
60 |
+
raise AttributeError("Could not access latents of provided encoder_output")
|
61 |
+
|
62 |
+
|
63 |
class LatentConsistencyModelPipeline_controlnet(DiffusionPipeline):
|
64 |
_optional_components = ["scheduler"]
|
65 |
|
|
|
286 |
)
|
287 |
|
288 |
elif isinstance(generator, list):
|
289 |
+
init_latents = [
|
290 |
+
retrieve_latents(
|
291 |
+
self.vae.encode(image[i : i + 1]), generator=generator[i]
|
292 |
+
)
|
293 |
+
for i in range(batch_size)
|
294 |
+
]
|
|
|
|
|
|
|
|
|
295 |
init_latents = torch.cat(init_latents, dim=0)
|
296 |
else:
|
297 |
+
init_latents = retrieve_latents(
|
298 |
+
self.vae.encode(image), generator=generator
|
299 |
+
)
|
|
|
300 |
|
301 |
init_latents = self.vae.config.scaling_factor * init_latents
|
302 |
|
requirements.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
diffusers==0.22.
|
2 |
transformers==4.34.1
|
3 |
gradio==3.50.2
|
4 |
--extra-index-url https://download.pytorch.org/whl/cu121
|
|
|
1 |
+
diffusers==0.22.2
|
2 |
transformers==4.34.1
|
3 |
gradio==3.50.2
|
4 |
--extra-index-url https://download.pytorch.org/whl/cu121
|