YiYiXu commited on
Commit
ebcc153
·
1 Parent(s): 40cd651

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +28 -41
README.md CHANGED
@@ -23,21 +23,16 @@ pip install diffusers transformers accelerate
23
  ### Text to image
24
 
25
  ```python
26
- from diffusers import DiffusionPipeline
27
  import torch
28
 
29
- pipe_prior = DiffusionPipeline.from_pretrained("kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16)
30
- pipe_prior.to("cuda")
31
-
32
- t2i_pipe = DiffusionPipeline.from_pretrained("kandinsky-community/kandinsky-2-2-decoder", torch_dtype=torch.float16)
33
- t2i_pipe.to("cuda")
34
 
35
  prompt = "portrait of a young women, blue eyes, cinematic"
36
  negative_prompt = "low quality, bad quality"
37
 
38
- image_embeds, negative_image_embeds = pipe_prior(prompt, negative_prompt, guidance_scale=1.0).to_tuple()
39
-
40
- image = t2i_pipe(image_embeds=image_embeds, negative_image_embeds=negative_image_embeds, height=768, width=768).images[0]
41
  image.save("portrait.png")
42
  ```
43
 
@@ -59,32 +54,16 @@ original_image = original_image.resize((768, 512))
59
  ![img](https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg)
60
 
61
  ```python
62
- from diffusers import KandinskyV22Img2ImgPipeline, KandinskyV22PriorPipeline
63
  import torch
64
 
65
- # create prior
66
- pipe_prior = KandinskyV22PriorPipeline.from_pretrained(
67
- "kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16
68
- )
69
- pipe_prior.to("cuda")
70
-
71
- # create img2img pipeline
72
- pipe = KandinskyV22Img2ImgPipeline.from_pretrained("kandinsky-community/kandinsky-2-2-decoder", torch_dtype=torch.float16)
73
- pipe.to("cuda")
74
 
75
  prompt = "A fantasy landscape, Cinematic lighting"
76
  negative_prompt = "low quality, bad quality"
77
 
78
- image_embeds, negative_image_embeds = pipe_prior(prompt, negative_prompt).to_tuple()
79
-
80
- out = pipe(
81
- image=original_image,
82
- image_embeds=image_embeds,
83
- negative_image_embeds=negative_image_embeds,
84
- height=768,
85
- width=768,
86
- strength=0.3,
87
- )
88
 
89
  out.images[0].save("fantasy_land.png")
90
  ```
@@ -135,35 +114,31 @@ image.save("starry_cat.png")
135
 
136
  ### Text Guided Inpainting Generation
137
 
 
138
  ```python
139
- from diffusers import KandinskyV22InpaintPipeline, KandinskyV22PriorPipeline
140
  from diffusers.utils import load_image
141
  import torch
142
  import numpy as np
143
 
144
- pipe_prior = KandinskyV22PriorPipeline.from_pretrained(
145
- "kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16
146
- )
147
- pipe_prior.to("cuda")
148
 
149
  prompt = "a hat"
150
- prior_output = pipe_prior(prompt)
151
-
152
- pipe = KandinskyV22InpaintPipeline.from_pretrained("kandinsky-community/kandinsky-2-2-inpaint", torch_dtype=torch.float16)
153
- pipe.to("cuda")
154
 
155
  init_image = load_image(
156
  "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/kandinsky/cat.png"
157
  )
158
 
159
- mask = np.ones((768, 768), dtype=np.float32)
160
  # Let's mask out an area above the cat's head
161
- mask[:250, 250:-250] = 0
 
162
 
163
  out = pipe(
 
164
  image=init_image,
165
  mask_image=mask,
166
- **prior_output,
167
  height=768,
168
  width=768,
169
  num_inference_steps=150,
@@ -174,6 +149,18 @@ image.save("cat_with_hat.png")
174
  ```
175
  ![img](https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/kandinskyv22/cat_with_hat.png)
176
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
  ### Text-to-Image Generation with ControlNet Conditioning
179
 
 
23
  ### Text to image
24
 
25
  ```python
26
+ from diffusers import AutoPipelineForText2Image
27
  import torch
28
 
29
+ pipe = AutoPipelineForText2Image.from_pretrained("kandinsky-community/kandinsky-2-2-decoder", torch_dtype=torch.float16)
30
+ pipe = pipe.to("cuda")
 
 
 
31
 
32
  prompt = "portrait of a young women, blue eyes, cinematic"
33
  negative_prompt = "low quality, bad quality"
34
 
35
+ image = pipe(prompt=prompt, negative_prompt=negative_prompt, prior_guidance_scale =1.0, height=768, width=768).images[0]
 
 
36
  image.save("portrait.png")
37
  ```
38
 
 
54
  ![img](https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg)
55
 
56
  ```python
57
+ from diffusers import AutoPipelineForImage2Image
58
  import torch
59
 
60
+ pipe = AutoPipelineForImage2Image.from_pretrained("kandinsky-community/kandinsky-2-2-decoder", torch_dtype=torch.float16)
61
+ pipe.enable_model_cpu_offload()
 
 
 
 
 
 
 
62
 
63
  prompt = "A fantasy landscape, Cinematic lighting"
64
  negative_prompt = "low quality, bad quality"
65
 
66
+ image = pipe(prompt=prompt, image=original_image, strength=0.3, height=768, width=768).images[0]
 
 
 
 
 
 
 
 
 
67
 
68
  out.images[0].save("fantasy_land.png")
69
  ```
 
114
 
115
  ### Text Guided Inpainting Generation
116
 
117
+
118
  ```python
119
+ from diffusers import AutoPipelineForInpainting
120
  from diffusers.utils import load_image
121
  import torch
122
  import numpy as np
123
 
124
+ pipe = AutoPipelineForInpainting.from_pretrained("kandinsky-community/kandinsky-2-2-decoder-inpaint", torch_dtype=torch.float16)
125
+ pipe.enable_model_cpu_offload()
 
 
126
 
127
  prompt = "a hat"
 
 
 
 
128
 
129
  init_image = load_image(
130
  "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/kandinsky/cat.png"
131
  )
132
 
133
+ mask = np.zeros((768, 768), dtype=np.float32)
134
  # Let's mask out an area above the cat's head
135
+ mask[:250, 250:-250] = 1
136
+
137
 
138
  out = pipe(
139
+ prompt=prompt,
140
  image=init_image,
141
  mask_image=mask,
 
142
  height=768,
143
  width=768,
144
  num_inference_steps=150,
 
149
  ```
150
  ![img](https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/kandinskyv22/cat_with_hat.png)
151
 
152
+ __<font color=red>Breaking change on the mask input:</font>__
153
+ We introduced a breaking change for Kandinsky inpainting pipeline in the following pull request: https://github.com/huggingface/diffusers/pull/4207. Previously we accepted a mask format where black pixels represent the masked-out area. We have changed to use white pixels to represent masks instead in order to have a unified mask format across all our pipelines.
154
+ Please upgrade your inpainting code to follow the above. If you are using Kandinsky Inpaint in production. You now need to change the mask to:
155
+
156
+ ```python
157
+ # For PIL input
158
+ import PIL.ImageOps
159
+ mask = PIL.ImageOps.invert(mask)
160
+
161
+ # For PyTorch and Numpy input
162
+ mask = 1 - mask
163
+ ```
164
 
165
  ### Text-to-Image Generation with ControlNet Conditioning
166