Spaces:

Surn
/

HexaGrid

Running on Zero

App Files Files Community

Surn commited on Feb 17

Commit

ab3ef5d

1 Parent(s): 80b040e

Working on Hugging Face.

Browse files

Files changed (5) hide show

README.md +1 -1
src/block.py +0 -333
src/generate.py +0 -294
src/lora_controller.py +0 -75
src/transformer.py +0 -270

README.md CHANGED Viewed

@@ -7,7 +7,7 @@ sdk: gradio
 python_version: 3.10.13
 sdk_version: 5.16.0
 app_file: app.py
-pinned: false
 short_description: Transform Your Images into Mesmerizing Hexagon Grids
 license: apache-2.0
 tags:

 python_version: 3.10.13
 sdk_version: 5.16.0
 app_file: app.py
+pinned: true
 short_description: Transform Your Images into Mesmerizing Hexagon Grids
 license: apache-2.0
 tags:

src/block.py DELETED Viewed

@@ -1,333 +0,0 @@
-import torch
-from typing import List, Union, Optional, Dict, Any, Callable
-from diffusers.models.attention_processor import Attention, F
-from .lora_controller import enable_lora
-def attn_forward(
-    attn: Attention,
-    hidden_states: torch.FloatTensor,
-    encoder_hidden_states: torch.FloatTensor = None,
-    condition_latents: torch.FloatTensor = None,
-    attention_mask: Optional[torch.FloatTensor] = None,
-    image_rotary_emb: Optional[torch.Tensor] = None,
-    cond_rotary_emb: Optional[torch.Tensor] = None,
-    model_config: Optional[Dict[str, Any]] = {},
-) -> torch.FloatTensor:
-    batch_size, _, _ = (
-        hidden_states.shape
-        if encoder_hidden_states is None
-        else encoder_hidden_states.shape
-    )
-    with enable_lora(
-        (attn.to_q, attn.to_k, attn.to_v), model_config.get("latent_lora", False)
-    ):
-        # `sample` projections.
-        query = attn.to_q(hidden_states)
-        key = attn.to_k(hidden_states)
-        value = attn.to_v(hidden_states)
-    inner_dim = key.shape[-1]
-    head_dim = inner_dim // attn.heads
-    query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
-    key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
-    value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
-    if attn.norm_q is not None:
-        query = attn.norm_q(query)
-    if attn.norm_k is not None:
-        key = attn.norm_k(key)
-    # the attention in FluxSingleTransformerBlock does not use `encoder_hidden_states`
-    if encoder_hidden_states is not None:
-        # `context` projections.
-        encoder_hidden_states_query_proj = attn.add_q_proj(encoder_hidden_states)
-        encoder_hidden_states_key_proj = attn.add_k_proj(encoder_hidden_states)
-        encoder_hidden_states_value_proj = attn.add_v_proj(encoder_hidden_states)
-        encoder_hidden_states_query_proj = encoder_hidden_states_query_proj.view(
-            batch_size, -1, attn.heads, head_dim
-        ).transpose(1, 2)
-        encoder_hidden_states_key_proj = encoder_hidden_states_key_proj.view(
-            batch_size, -1, attn.heads, head_dim
-        ).transpose(1, 2)
-        encoder_hidden_states_value_proj = encoder_hidden_states_value_proj.view(
-            batch_size, -1, attn.heads, head_dim
-        ).transpose(1, 2)
-        if attn.norm_added_q is not None:
-            encoder_hidden_states_query_proj = attn.norm_added_q(
-                encoder_hidden_states_query_proj
-            )
-        if attn.norm_added_k is not None:
-            encoder_hidden_states_key_proj = attn.norm_added_k(
-                encoder_hidden_states_key_proj
-            )
-        # attention
-        query = torch.cat([encoder_hidden_states_query_proj, query], dim=2)
-        key = torch.cat([encoder_hidden_states_key_proj, key], dim=2)
-        value = torch.cat([encoder_hidden_states_value_proj, value], dim=2)
-    if image_rotary_emb is not None:
-        from diffusers.models.embeddings import apply_rotary_emb
-        query = apply_rotary_emb(query, image_rotary_emb)
-        key = apply_rotary_emb(key, image_rotary_emb)
-    if condition_latents is not None:
-        cond_query = attn.to_q(condition_latents)
-        cond_key = attn.to_k(condition_latents)
-        cond_value = attn.to_v(condition_latents)
-        cond_query = cond_query.view(batch_size, -1, attn.heads, head_dim).transpose(
-            1, 2
-        )
-        cond_key = cond_key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
-        cond_value = cond_value.view(batch_size, -1, attn.heads, head_dim).transpose(
-            1, 2
-        )
-        if attn.norm_q is not None:
-            cond_query = attn.norm_q(cond_query)
-        if attn.norm_k is not None:
-            cond_key = attn.norm_k(cond_key)
-    if cond_rotary_emb is not None:
-        cond_query = apply_rotary_emb(cond_query, cond_rotary_emb)
-        cond_key = apply_rotary_emb(cond_key, cond_rotary_emb)
-    if condition_latents is not None:
-        query = torch.cat([query, cond_query], dim=2)
-        key = torch.cat([key, cond_key], dim=2)
-        value = torch.cat([value, cond_value], dim=2)
-    if not model_config.get("union_cond_attn", True):
-        # If we don't want to use the union condition attention, we need to mask the attention
-        # between the hidden states and the condition latents
-        attention_mask = torch.ones(
-            query.shape[2], key.shape[2], device=query.device, dtype=torch.bool
-        )
-        condition_n = cond_query.shape[2]
-        attention_mask[-condition_n:, :-condition_n] = False
-        attention_mask[:-condition_n, -condition_n:] = False
-    if hasattr(attn, "c_factor"):
-        attention_mask = torch.zeros(
-            query.shape[2], key.shape[2], device=query.device, dtype=query.dtype
-        )
-        condition_n = cond_query.shape[2]
-        bias = torch.log(attn.c_factor[0])
-        attention_mask[-condition_n:, :-condition_n] = bias
-        attention_mask[:-condition_n, -condition_n:] = bias
-    hidden_states = F.scaled_dot_product_attention(
-        query, key, value, dropout_p=0.0, is_causal=False, attn_mask=attention_mask
-    )
-    hidden_states = hidden_states.transpose(1, 2).reshape(
-        batch_size, -1, attn.heads * head_dim
-    )
-    hidden_states = hidden_states.to(query.dtype)
-    if encoder_hidden_states is not None:
-        if condition_latents is not None:
-            encoder_hidden_states, hidden_states, condition_latents = (
-                hidden_states[:, : encoder_hidden_states.shape[1]],
-                hidden_states[
-                    :, encoder_hidden_states.shape[1] : -condition_latents.shape[1]
-                ],
-                hidden_states[:, -condition_latents.shape[1] :],
-            )
-        else:
-            encoder_hidden_states, hidden_states = (
-                hidden_states[:, : encoder_hidden_states.shape[1]],
-                hidden_states[:, encoder_hidden_states.shape[1] :],
-            )
-        with enable_lora((attn.to_out[0],), model_config.get("latent_lora", False)):
-            # linear proj
-            hidden_states = attn.to_out[0](hidden_states)
-            # dropout
-            hidden_states = attn.to_out[1](hidden_states)
-        encoder_hidden_states = attn.to_add_out(encoder_hidden_states)
-        if condition_latents is not None:
-            condition_latents = attn.to_out[0](condition_latents)
-            condition_latents = attn.to_out[1](condition_latents)
-        return (
-            (hidden_states, encoder_hidden_states, condition_latents)
-            if condition_latents is not None
-            else (hidden_states, encoder_hidden_states)
-        )
-    elif condition_latents is not None:
-        # if there are condition_latents, we need to separate the hidden_states and the condition_latents
-        hidden_states, condition_latents = (
-            hidden_states[:, : -condition_latents.shape[1]],
-            hidden_states[:, -condition_latents.shape[1] :],
-        )
-        return hidden_states, condition_latents
-    else:
-        return hidden_states
-def block_forward(
-    self,
-    hidden_states: torch.FloatTensor,
-    encoder_hidden_states: torch.FloatTensor,
-    condition_latents: torch.FloatTensor,
-    temb: torch.FloatTensor,
-    cond_temb: torch.FloatTensor,
-    cond_rotary_emb=None,
-    image_rotary_emb=None,
-    model_config: Optional[Dict[str, Any]] = {},
-):
-    use_cond = condition_latents is not None
-    with enable_lora((self.norm1.linear,), model_config.get("latent_lora", False)):
-        norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(
-            hidden_states, emb=temb
-        )
-    norm_encoder_hidden_states, c_gate_msa, c_shift_mlp, c_scale_mlp, c_gate_mlp = (
-        self.norm1_context(encoder_hidden_states, emb=temb)
-    )
-    if use_cond:
-        (
-            norm_condition_latents,
-            cond_gate_msa,
-            cond_shift_mlp,
-            cond_scale_mlp,
-            cond_gate_mlp,
-        ) = self.norm1(condition_latents, emb=cond_temb)
-    # Attention.
-    result = attn_forward(
-        self.attn,
-        model_config=model_config,
-        hidden_states=norm_hidden_states,
-        encoder_hidden_states=norm_encoder_hidden_states,
-        condition_latents=norm_condition_latents if use_cond else None,
-        image_rotary_emb=image_rotary_emb,
-        cond_rotary_emb=cond_rotary_emb if use_cond else None,
-    )
-    attn_output, context_attn_output = result[:2]
-    cond_attn_output = result[2] if use_cond else None
-    # Process attention outputs for the `hidden_states`.
-    # 1. hidden_states
-    attn_output = gate_msa.unsqueeze(1) * attn_output
-    hidden_states = hidden_states + attn_output
-    # 2. encoder_hidden_states
-    context_attn_output = c_gate_msa.unsqueeze(1) * context_attn_output
-    encoder_hidden_states = encoder_hidden_states + context_attn_output
-    # 3. condition_latents
-    if use_cond:
-        cond_attn_output = cond_gate_msa.unsqueeze(1) * cond_attn_output
-        condition_latents = condition_latents + cond_attn_output
-        if model_config.get("add_cond_attn", False):
-            hidden_states += cond_attn_output
-    # LayerNorm + MLP.
-    # 1. hidden_states
-    norm_hidden_states = self.norm2(hidden_states)
-    norm_hidden_states = (
-        norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None]
-    )
-    # 2. encoder_hidden_states
-    norm_encoder_hidden_states = self.norm2_context(encoder_hidden_states)
-    norm_encoder_hidden_states = (
-        norm_encoder_hidden_states * (1 + c_scale_mlp[:, None]) + c_shift_mlp[:, None]
-    )
-    # 3. condition_latents
-    if use_cond:
-        norm_condition_latents = self.norm2(condition_latents)
-        norm_condition_latents = (
-            norm_condition_latents * (1 + cond_scale_mlp[:, None])
-            + cond_shift_mlp[:, None]
-        )
-    # Feed-forward.
-    with enable_lora((self.ff.net[2],), model_config.get("latent_lora", False)):
-        # 1. hidden_states
-        ff_output = self.ff(norm_hidden_states)
-        ff_output = gate_mlp.unsqueeze(1) * ff_output
-    # 2. encoder_hidden_states
-    context_ff_output = self.ff_context(norm_encoder_hidden_states)
-    context_ff_output = c_gate_mlp.unsqueeze(1) * context_ff_output
-    # 3. condition_latents
-    if use_cond:
-        cond_ff_output = self.ff(norm_condition_latents)
-        cond_ff_output = cond_gate_mlp.unsqueeze(1) * cond_ff_output
-    # Process feed-forward outputs.
-    hidden_states = hidden_states + ff_output
-    encoder_hidden_states = encoder_hidden_states + context_ff_output
-    if use_cond:
-        condition_latents = condition_latents + cond_ff_output
-    # Clip to avoid overflow.
-    if encoder_hidden_states.dtype == torch.float16:
-        encoder_hidden_states = encoder_hidden_states.clip(-65504, 65504)
-    return encoder_hidden_states, hidden_states, condition_latents if use_cond else None
-def single_block_forward(
-    self,
-    hidden_states: torch.FloatTensor,
-    temb: torch.FloatTensor,
-    image_rotary_emb=None,
-    condition_latents: torch.FloatTensor = None,
-    cond_temb: torch.FloatTensor = None,
-    cond_rotary_emb=None,
-    model_config: Optional[Dict[str, Any]] = {},
-):
-    using_cond = condition_latents is not None
-    residual = hidden_states
-    with enable_lora(
-        (
-            self.norm.linear,
-            self.proj_mlp,
-        ),
-        model_config.get("latent_lora", False),
-    ):
-        norm_hidden_states, gate = self.norm(hidden_states, emb=temb)
-        mlp_hidden_states = self.act_mlp(self.proj_mlp(norm_hidden_states))
-    if using_cond:
-        residual_cond = condition_latents
-        norm_condition_latents, cond_gate = self.norm(condition_latents, emb=cond_temb)
-        mlp_cond_hidden_states = self.act_mlp(self.proj_mlp(norm_condition_latents))
-    attn_output = attn_forward(
-        self.attn,
-        model_config=model_config,
-        hidden_states=norm_hidden_states,
-        image_rotary_emb=image_rotary_emb,
-        **(
-            {
-                "condition_latents": norm_condition_latents,
-                "cond_rotary_emb": cond_rotary_emb if using_cond else None,
-            }
-            if using_cond
-            else {}
-        ),
-    )
-    if using_cond:
-        attn_output, cond_attn_output = attn_output
-    with enable_lora((self.proj_out,), model_config.get("latent_lora", False)):
-        hidden_states = torch.cat([attn_output, mlp_hidden_states], dim=2)
-        gate = gate.unsqueeze(1)
-        hidden_states = gate * self.proj_out(hidden_states)
-        hidden_states = residual + hidden_states
-    if using_cond:
-        condition_latents = torch.cat([cond_attn_output, mlp_cond_hidden_states], dim=2)
-        cond_gate = cond_gate.unsqueeze(1)
-        condition_latents = cond_gate * self.proj_out(condition_latents)
-        condition_latents = residual_cond + condition_latents
-    if hidden_states.dtype == torch.float16:
-        hidden_states = hidden_states.clip(-65504, 65504)
-    return hidden_states if not using_cond else (hidden_states, condition_latents)

src/generate.py DELETED Viewed

@@ -1,294 +0,0 @@
-import torch
-import yaml, os
-from diffusers.pipelines import FluxPipeline
-from typing import List, Union, Optional, Dict, Any, Callable
-from .transformer import tranformer_forward
-from .condition import Condition
-from diffusers.pipelines.flux.pipeline_flux import (
-    FluxPipelineOutput,
-    calculate_shift,
-    retrieve_timesteps,
-    np,
-)
-def prepare_params(
-    prompt: Union[str, List[str]] = None,
-    prompt_2: Optional[Union[str, List[str]]] = None,
-    height: Optional[int] = 512,
-    width: Optional[int] = 512,
-    num_inference_steps: int = 28,
-    timesteps: List[int] = None,
-    guidance_scale: float = 3.5,
-    num_images_per_prompt: Optional[int] = 1,
-    generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
-    latents: Optional[torch.FloatTensor] = None,
-    prompt_embeds: Optional[torch.FloatTensor] = None,
-    pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
-    output_type: Optional[str] = "pil",
-    return_dict: bool = True,
-    joint_attention_kwargs: Optional[Dict[str, Any]] = None,
-    callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
-    callback_on_step_end_tensor_inputs: List[str] = ["latents"],
-    max_sequence_length: int = 512,
-    **kwargs: dict,
-):
-    return (
-        prompt,
-        prompt_2,
-        height,
-        width,
-        num_inference_steps,
-        timesteps,
-        guidance_scale,
-        num_images_per_prompt,
-        generator,
-        latents,
-        prompt_embeds,
-        pooled_prompt_embeds,
-        output_type,
-        return_dict,
-        joint_attention_kwargs,
-        callback_on_step_end,
-        callback_on_step_end_tensor_inputs,
-        max_sequence_length,
-    )
-def seed_everything(seed: int = 42):
-    torch.backends.cudnn.deterministic = True
-    torch.manual_seed(seed)
-    np.random.seed(seed)
-@torch.no_grad()
-def generate(
-    pipeline: FluxPipeline,
-    conditions: List[Condition] = None,
-    model_config: Optional[Dict[str, Any]] = {},
-    condition_scale: float = 1.0,
-    **params: dict,
-):
-    # model_config = model_config or get_config(config_path).get("model", {})
-    if condition_scale != 1:
-        for name, module in pipeline.transformer.named_modules():
-            if not name.endswith(".attn"):
-                continue
-            module.c_factor = torch.ones(1, 1) * condition_scale
-    self = pipeline
-    (
-        prompt,
-        prompt_2,
-        height,
-        width,
-        num_inference_steps,
-        timesteps,
-        guidance_scale,
-        num_images_per_prompt,
-        generator,
-        latents,
-        prompt_embeds,
-        pooled_prompt_embeds,
-        output_type,
-        return_dict,
-        joint_attention_kwargs,
-        callback_on_step_end,
-        callback_on_step_end_tensor_inputs,
-        max_sequence_length,
-    ) = prepare_params(**params)
-    height = height or self.default_sample_size * self.vae_scale_factor
-    width = width or self.default_sample_size * self.vae_scale_factor
-    # 1. Check inputs. Raise error if not correct
-    self.check_inputs(
-        prompt,
-        prompt_2,
-        height,
-        width,
-        prompt_embeds=prompt_embeds,
-        pooled_prompt_embeds=pooled_prompt_embeds,
-        callback_on_step_end_tensor_inputs=callback_on_step_end_tensor_inputs,
-        max_sequence_length=max_sequence_length,
-    )
-    self._guidance_scale = guidance_scale
-    self._joint_attention_kwargs = joint_attention_kwargs
-    self._interrupt = False
-    # 2. Define call parameters
-    if prompt is not None and isinstance(prompt, str):
-        batch_size = 1
-    elif prompt is not None and isinstance(prompt, list):
-        batch_size = len(prompt)
-    else:
-        batch_size = prompt_embeds.shape[0]
-    device = self._execution_device
-    lora_scale = (
-        self.joint_attention_kwargs.get("scale", None)
-        if self.joint_attention_kwargs is not None
-        else None
-    )
-    (
-        prompt_embeds,
-        pooled_prompt_embeds,
-        text_ids,
-    ) = self.encode_prompt(
-        prompt=prompt,
-        prompt_2=prompt_2,
-        prompt_embeds=prompt_embeds,
-        pooled_prompt_embeds=pooled_prompt_embeds,
-        device=device,
-        num_images_per_prompt=num_images_per_prompt,
-        max_sequence_length=max_sequence_length,
-        lora_scale=lora_scale,
-    )
-    # 4. Prepare latent variables
-    num_channels_latents = self.transformer.config.in_channels // 4
-    latents, latent_image_ids = self.prepare_latents(
-        batch_size * num_images_per_prompt,
-        num_channels_latents,
-        height,
-        width,
-        prompt_embeds.dtype,
-        device,
-        generator,
-        latents,
-    )
-    # 4.1. Prepare conditions
-    condition_latents, condition_ids, condition_type_ids = ([] for _ in range(3))
-    use_condition = conditions is not None or []
-    if use_condition:
-        assert len(conditions) <= 1, "Only one condition is supported for now."
-        pipeline.set_adapters(
-            {
-                512: "subject_512",
-                1024: "subject_1024",
-            }[height]
-        )
-        for condition in conditions:
-            tokens, ids, type_id = condition.encode(self)
-            condition_latents.append(tokens)  # [batch_size, token_n, token_dim]
-            condition_ids.append(ids)  # [token_n, id_dim(3)]
-            condition_type_ids.append(type_id)  # [token_n, 1]
-        condition_latents = torch.cat(condition_latents, dim=1)
-        condition_ids = torch.cat(condition_ids, dim=0)
-        if condition.condition_type == "subject":
-            delta = 32 if height == 512 else -32
-            # print(f"Condition delta: {delta}")
-            condition_ids[:, 2] += delta
-        condition_type_ids = torch.cat(condition_type_ids, dim=0)
-    # 5. Prepare timesteps
-    sigmas = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps)
-    image_seq_len = latents.shape[1]
-    mu = calculate_shift(
-        image_seq_len,
-        self.scheduler.config.base_image_seq_len,
-        self.scheduler.config.max_image_seq_len,
-        self.scheduler.config.base_shift,
-        self.scheduler.config.max_shift,
-    )
-    timesteps, num_inference_steps = retrieve_timesteps(
-        self.scheduler,
-        num_inference_steps,
-        device,
-        timesteps,
-        sigmas,
-        mu=mu,
-    )
-    num_warmup_steps = max(
-        len(timesteps) - num_inference_steps * self.scheduler.order, 0
-    )
-    self._num_timesteps = len(timesteps)
-    # 6. Denoising loop
-    with self.progress_bar(total=num_inference_steps) as progress_bar:
-        for i, t in enumerate(timesteps):
-            if self.interrupt:
-                continue
-            # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
-            timestep = t.expand(latents.shape[0]).to(latents.dtype)
-            # handle guidance
-            if self.transformer.config.guidance_embeds:
-                guidance = torch.tensor([guidance_scale], device=device)
-                guidance = guidance.expand(latents.shape[0])
-            else:
-                guidance = None
-            noise_pred = tranformer_forward(
-                self.transformer,
-                model_config=model_config,
-                # Inputs of the condition (new feature)
-                condition_latents=condition_latents if use_condition else None,
-                condition_ids=condition_ids if use_condition else None,
-                condition_type_ids=condition_type_ids if use_condition else None,
-                # Inputs to the original transformer
-                hidden_states=latents,
-                # YiYi notes: divide it by 1000 for now because we scale it by 1000 in the transforme rmodel (we should not keep it but I want to keep the inputs same for the model for testing)
-                timestep=timestep / 1000,
-                guidance=guidance,
-                pooled_projections=pooled_prompt_embeds,
-                encoder_hidden_states=prompt_embeds,
-                txt_ids=text_ids,
-                img_ids=latent_image_ids,
-                joint_attention_kwargs=self.joint_attention_kwargs,
-                return_dict=False,
-            )[0]
-            # compute the previous noisy sample x_t -> x_t-1
-            latents_dtype = latents.dtype
-            latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
-            if latents.dtype != latents_dtype:
-                if torch.backends.mps.is_available():
-                    # some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272
-                    latents = latents.to(latents_dtype)
-            if callback_on_step_end is not None:
-                callback_kwargs = {}
-                for k in callback_on_step_end_tensor_inputs:
-                    callback_kwargs[k] = locals()[k]
-                callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
-                latents = callback_outputs.pop("latents", latents)
-                prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
-            # call the callback, if provided
-            if i == len(timesteps) - 1 or (
-                (i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0
-            ):
-                progress_bar.update()
-    if output_type == "latent":
-        image = latents
-    else:
-        latents = self._unpack_latents(latents, height, width, self.vae_scale_factor)
-        latents = (
-            latents / self.vae.config.scaling_factor
-        ) + self.vae.config.shift_factor
-        image = self.vae.decode(latents, return_dict=False)[0]
-        image = self.image_processor.postprocess(image, output_type=output_type)
-    # Offload all models
-    self.maybe_free_model_hooks()
-    if condition_scale != 1:
-        for name, module in pipeline.transformer.named_modules():
-            if not name.endswith(".attn"):
-                continue
-            del module.c_factor
-    if not return_dict:
-        return (image,)
-    return FluxPipelineOutput(images=image)

src/lora_controller.py DELETED Viewed

@@ -1,75 +0,0 @@
-from peft.tuners.tuners_utils import BaseTunerLayer
-from typing import List, Any, Optional, Type
-class enable_lora:
-    def __init__(self, lora_modules: List[BaseTunerLayer], activated: bool) -> None:
-        self.activated: bool = activated
-        if activated:
-            return
-        self.lora_modules: List[BaseTunerLayer] = [
-            each for each in lora_modules if isinstance(each, BaseTunerLayer)
-        ]
-        self.scales = [
-            {
-                active_adapter: lora_module.scaling[active_adapter]
-                for active_adapter in lora_module.active_adapters
-            }
-            for lora_module in self.lora_modules
-        ]
-    def __enter__(self) -> None:
-        if self.activated:
-            return
-        for lora_module in self.lora_modules:
-            if not isinstance(lora_module, BaseTunerLayer):
-                continue
-            lora_module.scale_layer(0)
-    def __exit__(
-        self,
-        exc_type: Optional[Type[BaseException]],
-        exc_val: Optional[BaseException],
-        exc_tb: Optional[Any],
-    ) -> None:
-        if self.activated:
-            return
-        for i, lora_module in enumerate(self.lora_modules):
-            if not isinstance(lora_module, BaseTunerLayer):
-                continue
-            for active_adapter in lora_module.active_adapters:
-                lora_module.scaling[active_adapter] = self.scales[i][active_adapter]
-class set_lora_scale:
-    def __init__(self, lora_modules: List[BaseTunerLayer], scale: float) -> None:
-        self.lora_modules: List[BaseTunerLayer] = [
-            each for each in lora_modules if isinstance(each, BaseTunerLayer)
-        ]
-        self.scales = [
-            {
-                active_adapter: lora_module.scaling[active_adapter]
-                for active_adapter in lora_module.active_adapters
-            }
-            for lora_module in self.lora_modules
-        ]
-        self.scale = scale
-    def __enter__(self) -> None:
-        for lora_module in self.lora_modules:
-            if not isinstance(lora_module, BaseTunerLayer):
-                continue
-            lora_module.scale_layer(self.scale)
-    def __exit__(
-        self,
-        exc_type: Optional[Type[BaseException]],
-        exc_val: Optional[BaseException],
-        exc_tb: Optional[Any],
-    ) -> None:
-        for i, lora_module in enumerate(self.lora_modules):
-            if not isinstance(lora_module, BaseTunerLayer):
-                continue
-            for active_adapter in lora_module.active_adapters:
-                lora_module.scaling[active_adapter] = self.scales[i][active_adapter]

src/transformer.py DELETED Viewed

@@ -1,270 +0,0 @@
-import torch
-from diffusers.pipelines import FluxPipeline
-from typing import List, Union, Optional, Dict, Any, Callable
-from .block import block_forward, single_block_forward
-from .lora_controller import enable_lora
-from diffusers.models.transformers.transformer_flux import (
-    FluxTransformer2DModel,
-    Transformer2DModelOutput,
-    USE_PEFT_BACKEND,
-    is_torch_version,
-    scale_lora_layers,
-    unscale_lora_layers,
-    logger,
-)
-import numpy as np
-def prepare_params(
-    hidden_states: torch.Tensor,
-    encoder_hidden_states: torch.Tensor = None,
-    pooled_projections: torch.Tensor = None,
-    timestep: torch.LongTensor = None,
-    img_ids: torch.Tensor = None,
-    txt_ids: torch.Tensor = None,
-    guidance: torch.Tensor = None,
-    joint_attention_kwargs: Optional[Dict[str, Any]] = None,
-    controlnet_block_samples=None,
-    controlnet_single_block_samples=None,
-    return_dict: bool = True,
-    **kwargs: dict,
-):
-    return (
-        hidden_states,
-        encoder_hidden_states,
-        pooled_projections,
-        timestep,
-        img_ids,
-        txt_ids,
-        guidance,
-        joint_attention_kwargs,
-        controlnet_block_samples,
-        controlnet_single_block_samples,
-        return_dict,
-    )
-def tranformer_forward(
-    transformer: FluxTransformer2DModel,
-    condition_latents: torch.Tensor,
-    condition_ids: torch.Tensor,
-    condition_type_ids: torch.Tensor,
-    model_config: Optional[Dict[str, Any]] = {},
-    return_conditional_latents: bool = False,
-    c_t=0,
-    **params: dict,
-):
-    self = transformer
-    use_condition = condition_latents is not None
-    use_condition_in_single_blocks = model_config.get(
-        "use_condition_in_single_blocks", True
-    )
-    # if return_conditional_latents is True, use_condition and use_condition_in_single_blocks must be True
-    assert not return_conditional_latents or (
-        use_condition and use_condition_in_single_blocks
-    ), "`return_conditional_latents` is True, `use_condition` and `use_condition_in_single_blocks` must be True"
-    (
-        hidden_states,
-        encoder_hidden_states,
-        pooled_projections,
-        timestep,
-        img_ids,
-        txt_ids,
-        guidance,
-        joint_attention_kwargs,
-        controlnet_block_samples,
-        controlnet_single_block_samples,
-        return_dict,
-    ) = prepare_params(**params)
-    if joint_attention_kwargs is not None:
-        joint_attention_kwargs = joint_attention_kwargs.copy()
-        lora_scale = joint_attention_kwargs.pop("scale", 1.0)
-    else:
-        lora_scale = 1.0
-    if USE_PEFT_BACKEND:
-        # weight the lora layers by setting `lora_scale` for each PEFT layer
-        scale_lora_layers(self, lora_scale)
-    else:
-        if (
-            joint_attention_kwargs is not None
-            and joint_attention_kwargs.get("scale", None) is not None
-        ):
-            logger.warning(
-                "Passing `scale` via `joint_attention_kwargs` when not using the PEFT backend is ineffective."
-            )
-    with enable_lora((self.x_embedder,), model_config.get("latent_lora", False)):
-        hidden_states = self.x_embedder(hidden_states)
-    condition_latents = self.x_embedder(condition_latents) if use_condition else None
-    timestep = timestep.to(hidden_states.dtype) * 1000
-    if guidance is not None:
-        guidance = guidance.to(hidden_states.dtype) * 1000
-    else:
-        guidance = None
-    temb = (
-        self.time_text_embed(timestep, pooled_projections)
-        if guidance is None
-        else self.time_text_embed(timestep, guidance, pooled_projections)
-    )
-    cond_temb = (
-        self.time_text_embed(torch.ones_like(timestep) * c_t * 1000, pooled_projections)
-        if guidance is None
-        else self.time_text_embed(
-            torch.ones_like(timestep) * c_t * 1000, guidance, pooled_projections
-        )
-    )
-    if hasattr(self, "cond_type_embed") and condition_type_ids is not None:
-        cond_type_proj = self.time_text_embed.time_proj(condition_type_ids[0])
-        cond_type_emb = self.cond_type_embed(cond_type_proj.to(dtype=cond_temb.dtype))
-        cond_temb = cond_temb + cond_type_emb
-    encoder_hidden_states = self.context_embedder(encoder_hidden_states)
-    if txt_ids.ndim == 3:
-        logger.warning(
-            "Passing `txt_ids` 3d torch.Tensor is deprecated."
-            "Please remove the batch dimension and pass it as a 2d torch Tensor"
-        )
-        txt_ids = txt_ids[0]
-    if img_ids.ndim == 3:
-        logger.warning(
-            "Passing `img_ids` 3d torch.Tensor is deprecated."
-            "Please remove the batch dimension and pass it as a 2d torch Tensor"
-        )
-        img_ids = img_ids[0]
-    ids = torch.cat((txt_ids, img_ids), dim=0)
-    image_rotary_emb = self.pos_embed(ids)
-    if use_condition:
-        cond_ids = condition_ids
-        cond_rotary_emb = self.pos_embed(cond_ids)
-    # hidden_states = torch.cat([hidden_states, condition_latents], dim=1)
-    for index_block, block in enumerate(self.transformer_blocks):
-        if self.training and self.gradient_checkpointing:
-            def create_custom_forward(module, return_dict=None):
-                def custom_forward(*inputs):
-                    if return_dict is not None:
-                        return module(*inputs, return_dict=return_dict)
-                    else:
-                        return module(*inputs)
-                return custom_forward
-            ckpt_kwargs: Dict[str, Any] = (
-                {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
-            )
-            encoder_hidden_states, hidden_states = torch.utils.checkpoint.checkpoint(
-                create_custom_forward(block),
-                hidden_states,
-                encoder_hidden_states,
-                temb,
-                image_rotary_emb,
-                **ckpt_kwargs,
-            )
-        else:
-            encoder_hidden_states, hidden_states, condition_latents = block_forward(
-                block,
-                model_config=model_config,
-                hidden_states=hidden_states,
-                encoder_hidden_states=encoder_hidden_states,
-                condition_latents=condition_latents if use_condition else None,
-                temb=temb,
-                cond_temb=cond_temb if use_condition else None,
-                cond_rotary_emb=cond_rotary_emb if use_condition else None,
-                image_rotary_emb=image_rotary_emb,
-            )
-        # controlnet residual
-        if controlnet_block_samples is not None:
-            interval_control = len(self.transformer_blocks) / len(
-                controlnet_block_samples
-            )
-            interval_control = int(np.ceil(interval_control))
-            hidden_states = (
-                hidden_states
-                + controlnet_block_samples[index_block // interval_control]
-            )
-    hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1)
-    for index_block, block in enumerate(self.single_transformer_blocks):
-        if self.training and self.gradient_checkpointing:
-            def create_custom_forward(module, return_dict=None):
-                def custom_forward(*inputs):
-                    if return_dict is not None:
-                        return module(*inputs, return_dict=return_dict)
-                    else:
-                        return module(*inputs)
-                return custom_forward
-            ckpt_kwargs: Dict[str, Any] = (
-                {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
-            )
-            hidden_states = torch.utils.checkpoint.checkpoint(
-                create_custom_forward(block),
-                hidden_states,
-                temb,
-                image_rotary_emb,
-                **ckpt_kwargs,
-            )
-        else:
-            result = single_block_forward(
-                block,
-                model_config=model_config,
-                hidden_states=hidden_states,
-                temb=temb,
-                image_rotary_emb=image_rotary_emb,
-                **(
-                    {
-                        "condition_latents": condition_latents,
-                        "cond_temb": cond_temb,
-                        "cond_rotary_emb": cond_rotary_emb,
-                    }
-                    if use_condition_in_single_blocks and use_condition
-                    else {}
-                ),
-            )
-            if use_condition_in_single_blocks and use_condition:
-                hidden_states, condition_latents = result
-            else:
-                hidden_states = result
-        # controlnet residual
-        if controlnet_single_block_samples is not None:
-            interval_control = len(self.single_transformer_blocks) / len(
-                controlnet_single_block_samples
-            )
-            interval_control = int(np.ceil(interval_control))
-            hidden_states[:, encoder_hidden_states.shape[1] :, ...] = (
-                hidden_states[:, encoder_hidden_states.shape[1] :, ...]
-                + controlnet_single_block_samples[index_block // interval_control]
-            )
-    hidden_states = hidden_states[:, encoder_hidden_states.shape[1] :, ...]
-    hidden_states = self.norm_out(hidden_states, temb)
-    output = self.proj_out(hidden_states)
-    if return_conditional_latents:
-        condition_latents = (
-            self.norm_out(condition_latents, cond_temb) if use_condition else None
-        )
-        condition_output = self.proj_out(condition_latents) if use_condition else None
-    if USE_PEFT_BACKEND:
-        # remove `lora_scale` from each PEFT layer
-        unscale_lora_layers(self, lora_scale)
-    if not return_dict:
-        return (
-            (output,) if not return_conditional_latents else (output, condition_output)
-        )
-    return Transformer2DModelOutput(sample=output)