Spaces:

dragynir
/

fashion_controlnet

Runtime error

dragynir commited on Jan 26, 2024

Commit

4f8bfe3

1 Parent(s): cb96de2

add adaptive model

Files changed (5) hide show

README.md CHANGED Viewed

@@ -22,5 +22,6 @@ a handsome man relaxing in a chair, shirt widely unbuttoned, eyes closed, close
 - [x] научиться записывать демку (научился Screencastify - поставил плагин в гугл)
 - [ ] добавить caption.csv в data/
-- [ ] прокинуть параметры в демке (seed и т д)
 - [ ] настроить запуск в hugging space

 - [x] научиться записывать демку (научился Screencastify - поставил плагин в гугл)
 - [ ] добавить caption.csv в data/
+- [ ] добавить adaptive resize
+- [ ] прокинуть параметры в демке (seed и т д), + adaptive resize размеры
 - [ ] настроить запуск в hugging space

app.py CHANGED Viewed

@@ -58,7 +58,7 @@ with block:
                         <p> This repo based on Unet from <a style="text-decoration: underline;" href="https://huggingface.co/spaces/wildoctopus/cloth-segmentation">cloth-segmentation</a>
         It's uses pre-trained U2NET to extract Upper body(red), Lower body(green), Full body(blue) masks, and then
-        run StableDiffusionXLControlNetPipeline with trained controlnet to generate image conditioned on this masks.
                         </p>
                 """)

                         <p> This repo based on Unet from <a style="text-decoration: underline;" href="https://huggingface.co/spaces/wildoctopus/cloth-segmentation">cloth-segmentation</a>
         It's uses pre-trained U2NET to extract Upper body(red), Lower body(green), Full body(blue) masks, and then
+        run StableDiffusionXLControlNetPipeline with trained controlnet_baseline to generate image conditioned on this masks.
                         </p>
                 """)

config.py CHANGED Viewed

@@ -15,6 +15,6 @@ class PipelineConfig:
     vae_path: str = 'madebyollin/sdxl-vae-fp16-fix'
-    controlnet_path: str = os.path.join(weights_path, 'controlnet')
     segmentation_model_path: str = os.path.join(weights_path, 'cloth_segm.pth')

     vae_path: str = 'madebyollin/sdxl-vae-fp16-fix'
+    controlnet_path: str = os.path.join(weights_path, 'controlnet_adaptive')
     segmentation_model_path: str = os.path.join(weights_path, 'cloth_segm.pth')

src/pipeline.py CHANGED Viewed

@@ -49,7 +49,10 @@ class FashionPipeline:
         # extract segmentation mask
         segm_mask = generate_mask(control_image, self.segmentation_model, device=self.device)
-        control_mask = self.create_control_image(segm_mask).resize((resolution, resolution))
         segm_mask = self.color_segmentation_mask(segm_mask)
@@ -81,6 +84,28 @@ class FashionPipeline:
         ch3 = (segm_mask == 3) * 255  # Full body(blue).
         return Image.fromarray(np.stack([ch1, ch2, ch3], axis=-1).astype('uint8'), 'RGB')
     def __init_pipeline(self):
         """Init models and SDXL pipeline."""
         self.segmentation_model = load_seg_model(

         # extract segmentation mask
         segm_mask = generate_mask(control_image, self.segmentation_model, device=self.device)
+        control_mask = self.adaptive_resize(
+            self.create_control_image(segm_mask),
+            target_image_size=resolution,
+        )
         segm_mask = self.color_segmentation_mask(segm_mask)
         ch3 = (segm_mask == 3) * 255  # Full body(blue).
         return Image.fromarray(np.stack([ch1, ch2, ch3], axis=-1).astype('uint8'), 'RGB')
+    def adaptive_resize(self, image, target_image_size=512, max_image_size=768, divisible=64):
+        assert target_image_size % divisible == 0
+        assert max_image_size % divisible == 0
+        assert max_image_size >= target_image_size
+        width, height = image.size
+        aspect_ratio = width / height
+        if height > width:
+            new_width = target_image_size
+            new_height = new_width / aspect_ratio
+            new_height = (new_height // divisible) * divisible
+            new_height = int(min(new_height, max_image_size))
+        else:
+            new_height = target_image_size
+            new_width = new_height / aspect_ratio
+            new_width = (new_width // divisible) * divisible
+            new_width = int(min(new_width, max_image_size))
+        return image.resize((new_width, new_height))
     def __init_pipeline(self):
         """Init models and SDXL pipeline."""
         self.segmentation_model = load_seg_model(

weights/controlnet_adaptive/config.json ADDED Viewed

+{
+  "_class_name": "ControlNetModel",
+  "_diffusers_version": "0.25.0.dev0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "class_embed_type": null,
+  "conditioning_channels": 3,
+  "conditioning_embedding_out_channels": [
+    16,
+    32,
+    96,
+    256
+  ],
+  "controlnet_conditioning_channel_order": "rgb",
+  "cross_attention_dim": 2048,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "global_pool_conditions": false,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_time_scale_shift": "default",
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}