AlekseyCalvin commited on
Commit
0ca490b
·
verified ·
1 Parent(s): 77881f1

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +102 -12
pipeline.py CHANGED
@@ -68,17 +68,108 @@ def prepare_timesteps(
68
  # FLUX pipeline function
69
  class FluxWithCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFileMixin):
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  def __init__(
72
  self,
73
- transformer = FluxTransformer2DModel,
74
- scheduler = FlowMatchEulerDiscreteScheduler,
75
- vae = AutoencoderKL,
76
- text_encoder = CLIPTextModelWithProjection,
77
- tokenizer = CLIPTokenizer,
78
- tokenizer_2 = T5TokenizerFast,
79
- tokenizer_3 = None,
80
- text_encoder_2 = T5EncoderModel,
81
- text_encoder_3 = None,
82
  ):
83
  super().__init__()
84
 
@@ -86,21 +177,20 @@ class FluxWithCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFile
86
  vae=vae,
87
  text_encoder=text_encoder,
88
  text_encoder_2=text_encoder_2,
89
- text_encoder_3=text_encoder_3,
90
  tokenizer=tokenizer,
91
  tokenizer_2=tokenizer_2,
92
- tokenizer_3=tokenizer_3,
93
  transformer=transformer,
94
  scheduler=scheduler,
95
  )
96
  self.vae_scale_factor = (
97
- 2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 16
98
  )
99
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
100
  self.tokenizer_max_length = (
101
  self.tokenizer.model_max_length if hasattr(self, "tokenizer") and self.tokenizer is not None else 77
102
  )
103
  self.default_sample_size = 64
 
104
  def __call__(
105
  self,
106
  prompt: Union[str, List[str]] = None,
 
68
  # FLUX pipeline function
69
  class FluxWithCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFileMixin):
70
 
71
+ r"""
72
+ The Flux pipeline for text-to-image generation.
73
+
74
+ Reference: https://blackforestlabs.ai/announcing-black-forest-labs/
75
+
76
+ Args:
77
+ transformer ([`FluxTransformer2DModel`]):
78
+ Conditional Transformer (MMDiT) architecture to denoise the encoded image latents.
79
+ scheduler ([`FlowMatchEulerDiscreteScheduler`]):
80
+ A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
81
+ vae ([`AutoencoderKL`]):
82
+ Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
83
+ text_encoder ([`CLIPTextModel`]):
84
+ [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), specifically
85
+ the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14) variant.
86
+ text_encoder_2 ([`T5EncoderModel`]):
87
+ [T5](https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5EncoderModel), specifically
88
+ the [google/t5-v1_1-xxl](https://huggingface.co/google/t5-v1_1-xxl) variant.
89
+ tokenizer (`CLIPTokenizer`):
90
+ Tokenizer of class
91
+ [CLIPTokenizer](https://huggingface.co/docs/transformers/en/model_doc/clip#transformers.CLIPTokenizer).
92
+ tokenizer_2 (`T5TokenizerFast`):
93
+ Second Tokenizer of class
94
+ [T5TokenizerFast](https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5TokenizerFast).
95
+ """
96
+
97
+ model_cpu_offload_seq = "text_encoder->text_encoder_2->transformer->vae"
98
+ _optional_components = []
99
+ _callback_tensor_inputs = ["latents", "prompt_embeds"] model_cpu_offload_seq = "text_encoder->text_encoder_2->transformer->vae"
100
+ _optional_components = []
101
+ _callback_tensor_inputs = ["latents", "prompt_embeds"]
102
+
103
+ def __init__(
104
+ self,
105
+ scheduler: FlowMatchEulerDiscreteScheduler,
106
+ vae: AutoencoderKL,
107
+ text_encoder: CLIPTextModel,
108
+ tokenizer: CLIPTokenizer,
109
+ text_encoder_2: T5EncoderModel,
110
+ tokenizer_2: T5TokenizerFast,
111
+ transformer: FluxTransformer2DModel,
112
+ ):
113
+ super().__init__()
114
+
115
+ self.register_modules(
116
+ vae=vae,
117
+ text_encoder=text_encoder,
118
+ text_encoder_2=text_encoder_2,
119
+ tokenizer=tokenizer,
120
+ tokenizer_2=tokenizer_2,
121
+ transformer=transformer,
122
+ scheduler=scheduler,
123
+ )
124
+ self.vae_scale_factor = (
125
+ 2 ** (len(self.vae.config.block_out_channels)) if hasattr(self, "vae") and self.vae is not None else 16
126
+ )
127
+ self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
128
+ self.tokenizer_max_length = (
129
+ self.tokenizer.model_max_length if hasattr(self, "tokenizer") and self.tokenizer is not None else 77
130
+ )
131
+ self.default_sample_size = 64
132
+ r"""
133
+ The Flux pipeline for text-to-image generation.
134
+
135
+ Reference: https://blackforestlabs.ai/announcing-black-forest-labs/
136
+
137
+ Args:
138
+ transformer ([`FluxTransformer2DModel`]):
139
+ Conditional Transformer (MMDiT) architecture to denoise the encoded image latents.
140
+ scheduler ([`FlowMatchEulerDiscreteScheduler`]):
141
+ A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
142
+ vae ([`AutoencoderKL`]):
143
+ Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
144
+ text_encoder ([`CLIPTextModel`]):
145
+ [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), specifically
146
+ the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14) variant.
147
+ text_encoder_2 ([`T5EncoderModel`]):
148
+ [T5](https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5EncoderModel), specifically
149
+ the [google/t5-v1_1-xxl](https://huggingface.co/google/t5-v1_1-xxl) variant.
150
+ tokenizer (`CLIPTokenizer`):
151
+ Tokenizer of class
152
+ [CLIPTokenizer](https://huggingface.co/docs/transformers/en/model_doc/clip#transformers.CLIPTokenizer).
153
+ tokenizer_2 (`T5TokenizerFast`):
154
+ Second Tokenizer of class
155
+ [T5TokenizerFast](https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5TokenizerFast).
156
+ """
157
+
158
+ model_cpu_offload_seq = "text_encoder->text_encoder_2->transformer->vae"
159
+ _optional_components = []
160
+ _callback_tensor_inputs = ["latents", "prompt_embeds"] model_cpu_offload_seq = "text_encoder->text_encoder_2->transformer->vae"
161
+ _optional_components = []
162
+ _callback_tensor_inputs = ["latents", "prompt_embeds"]
163
+
164
  def __init__(
165
  self,
166
+ scheduler: FlowMatchEulerDiscreteScheduler,
167
+ vae: AutoencoderKL,
168
+ text_encoder: CLIPTextModel,
169
+ tokenizer: CLIPTokenizer,
170
+ text_encoder_2: T5EncoderModel,
171
+ tokenizer_2: T5TokenizerFast,
172
+ transformer: FluxTransformer2DModel,
 
 
173
  ):
174
  super().__init__()
175
 
 
177
  vae=vae,
178
  text_encoder=text_encoder,
179
  text_encoder_2=text_encoder_2,
 
180
  tokenizer=tokenizer,
181
  tokenizer_2=tokenizer_2,
 
182
  transformer=transformer,
183
  scheduler=scheduler,
184
  )
185
  self.vae_scale_factor = (
186
+ 2 ** (len(self.vae.config.block_out_channels)) if hasattr(self, "vae") and self.vae is not None else 16
187
  )
188
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
189
  self.tokenizer_max_length = (
190
  self.tokenizer.model_max_length if hasattr(self, "tokenizer") and self.tokenizer is not None else 77
191
  )
192
  self.default_sample_size = 64
193
+
194
  def __call__(
195
  self,
196
  prompt: Union[str, List[str]] = None,