radames commited on
Commit
4e35a59
·
2 Parent(s): 2856cae dabf711

Merge branch 'main' into text2igm

Browse files
app-controlnet.py CHANGED
@@ -35,6 +35,7 @@ import psutil
35
  MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
36
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
37
  SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
 
38
  WIDTH = 512
39
  HEIGHT = 512
40
  # disable tiny autoencoder for better quality speed tradeoff
@@ -100,15 +101,20 @@ pipe.unet.to(memory_format=torch.channels_last)
100
  if psutil.virtual_memory().total < 64 * 1024**3:
101
  pipe.enable_attention_slicing()
102
 
103
- if not mps_available and not xpu_available:
104
- pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
105
- pipe(prompt="warmup", image=[Image.new("RGB", (768, 768))], control_image=[Image.new("RGB", (768, 768))])
106
-
107
  compel_proc = Compel(
108
  tokenizer=pipe.tokenizer,
109
  text_encoder=pipe.text_encoder,
110
  truncate_long_prompts=False,
111
  )
 
 
 
 
 
 
 
 
 
112
  user_queue_map = {}
113
 
114
 
 
35
  MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
36
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
37
  SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
38
+ TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
39
  WIDTH = 512
40
  HEIGHT = 512
41
  # disable tiny autoencoder for better quality speed tradeoff
 
101
  if psutil.virtual_memory().total < 64 * 1024**3:
102
  pipe.enable_attention_slicing()
103
 
 
 
 
 
104
  compel_proc = Compel(
105
  tokenizer=pipe.tokenizer,
106
  text_encoder=pipe.text_encoder,
107
  truncate_long_prompts=False,
108
  )
109
+ if TORCH_COMPILE:
110
+ pipe.text_encoder = torch.compile(pipe.text_encoder, mode="max-autotune", fullgraph=False)
111
+ pipe.tokenizer = torch.compile(pipe.tokenizer, mode="max-autotune", fullgraph=False)
112
+ pipe.unet = torch.compile(pipe.unet, mode="max-autotune", fullgraph=False)
113
+ pipe.vae = torch.compile(pipe.vae, mode="max-autotune", fullgraph=False)
114
+
115
+ pipe(prompt="warmup", image=[Image.new("RGB", (768, 768))], control_image=[Image.new("RGB", (768, 768))])
116
+
117
+
118
  user_queue_map = {}
119
 
120
 
app-img2img.py CHANGED
@@ -29,6 +29,8 @@ import psutil
29
  MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
30
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
31
  SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
 
 
32
  WIDTH = 512
33
  HEIGHT = 512
34
  # disable tiny autoencoder for better quality speed tradeoff
@@ -76,8 +78,12 @@ pipe.unet.to(memory_format=torch.channels_last)
76
  if psutil.virtual_memory().total < 64 * 1024**3:
77
  pipe.enable_attention_slicing()
78
 
79
- if not mps_available and not xpu_available:
80
- pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
 
 
 
 
81
  pipe(prompt="warmup", image=[Image.new("RGB", (512, 512))])
82
 
83
  compel_proc = Compel(
 
29
  MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
30
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
31
  SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
32
+ TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
33
+
34
  WIDTH = 512
35
  HEIGHT = 512
36
  # disable tiny autoencoder for better quality speed tradeoff
 
78
  if psutil.virtual_memory().total < 64 * 1024**3:
79
  pipe.enable_attention_slicing()
80
 
81
+ if TORCH_COMPILE:
82
+ pipe.text_encoder = torch.compile(pipe.text_encoder, mode="max-autotune", fullgraph=False)
83
+ pipe.tokenizer = torch.compile(pipe.tokenizer, mode="max-autotune", fullgraph=False)
84
+ pipe.unet = torch.compile(pipe.unet, mode="max-autotune", fullgraph=False)
85
+ pipe.vae = torch.compile(pipe.vae, mode="max-autotune", fullgraph=False)
86
+
87
  pipe(prompt="warmup", image=[Image.new("RGB", (512, 512))])
88
 
89
  compel_proc = Compel(
app-txt2img.py CHANGED
@@ -30,6 +30,8 @@ import psutil
30
  MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
31
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
32
  SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
 
 
33
  WIDTH = 768
34
  HEIGHT = 768
35
  # disable tiny autoencoder for better quality speed tradeoff
@@ -76,8 +78,12 @@ pipe.unet.to(memory_format=torch.channels_last)
76
  if psutil.virtual_memory().total < 64 * 1024**3:
77
  pipe.enable_attention_slicing()
78
 
79
- if not mps_available and not xpu_available:
80
- pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
 
 
 
 
81
  pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
82
 
83
  compel_proc = Compel(
 
30
  MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
31
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
32
  SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
33
+ TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
34
+
35
  WIDTH = 768
36
  HEIGHT = 768
37
  # disable tiny autoencoder for better quality speed tradeoff
 
78
  if psutil.virtual_memory().total < 64 * 1024**3:
79
  pipe.enable_attention_slicing()
80
 
81
+ if TORCH_COMPILE:
82
+ pipe.text_encoder = torch.compile(pipe.text_encoder, mode="max-autotune", fullgraph=False)
83
+ pipe.tokenizer = torch.compile(pipe.tokenizer, mode="max-autotune", fullgraph=False)
84
+ pipe.unet = torch.compile(pipe.unet, mode="max-autotune", fullgraph=False)
85
+ pipe.vae = torch.compile(pipe.vae, mode="max-autotune", fullgraph=False)
86
+
87
  pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
88
 
89
  compel_proc = Compel(
latent_consistency_controlnet.py CHANGED
@@ -25,7 +25,6 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
25
 
26
  from diffusers import (
27
  AutoencoderKL,
28
- AutoencoderTiny,
29
  ConfigMixin,
30
  DiffusionPipeline,
31
  SchedulerMixin,
@@ -50,6 +49,17 @@ import PIL.Image
50
 
51
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
52
 
 
 
 
 
 
 
 
 
 
 
 
53
  class LatentConsistencyModelPipeline_controlnet(DiffusionPipeline):
54
  _optional_components = ["scheduler"]
55
 
@@ -276,22 +286,17 @@ class LatentConsistencyModelPipeline_controlnet(DiffusionPipeline):
276
  )
277
 
278
  elif isinstance(generator, list):
279
- if isinstance(self.vae, AutoencoderTiny):
280
- init_latents = [
281
- self.vae.encode(image[i : i + 1]).latents
282
- for i in range(batch_size)
283
- ]
284
- else:
285
- init_latents = [
286
- self.vae.encode(image[i : i + 1]).latent_dist.sample(generator[i])
287
- for i in range(batch_size)
288
- ]
289
  init_latents = torch.cat(init_latents, dim=0)
290
  else:
291
- if isinstance(self.vae, AutoencoderTiny):
292
- init_latents = self.vae.encode(image).latents
293
- else:
294
- init_latents = self.vae.encode(image).latent_dist.sample(generator)
295
 
296
  init_latents = self.vae.config.scaling_factor * init_latents
297
 
 
25
 
26
  from diffusers import (
27
  AutoencoderKL,
 
28
  ConfigMixin,
29
  DiffusionPipeline,
30
  SchedulerMixin,
 
49
 
50
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
51
 
52
+
53
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.retrieve_latents
54
+ def retrieve_latents(encoder_output, generator):
55
+ if hasattr(encoder_output, "latent_dist"):
56
+ return encoder_output.latent_dist.sample(generator)
57
+ elif hasattr(encoder_output, "latents"):
58
+ return encoder_output.latents
59
+ else:
60
+ raise AttributeError("Could not access latents of provided encoder_output")
61
+
62
+
63
  class LatentConsistencyModelPipeline_controlnet(DiffusionPipeline):
64
  _optional_components = ["scheduler"]
65
 
 
286
  )
287
 
288
  elif isinstance(generator, list):
289
+ init_latents = [
290
+ retrieve_latents(
291
+ self.vae.encode(image[i : i + 1]), generator=generator[i]
292
+ )
293
+ for i in range(batch_size)
294
+ ]
 
 
 
 
295
  init_latents = torch.cat(init_latents, dim=0)
296
  else:
297
+ init_latents = retrieve_latents(
298
+ self.vae.encode(image), generator=generator
299
+ )
 
300
 
301
  init_latents = self.vae.config.scaling_factor * init_latents
302
 
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- diffusers==0.22.1
2
  transformers==4.34.1
3
  gradio==3.50.2
4
  --extra-index-url https://download.pytorch.org/whl/cu121
 
1
+ diffusers==0.22.2
2
  transformers==4.34.1
3
  gradio==3.50.2
4
  --extra-index-url https://download.pytorch.org/whl/cu121