Spaces:

nvn04
/

StyleSnap

Build error

App Files Files Community

nvn04 commited on Jan 8

Commit

c3e97f1

verified ·

1 Parent(s): 6219686

Update app.py

Browse files

Files changed (1) hide show

app.py +205 -356

app.py CHANGED Viewed

@@ -17,17 +17,22 @@ from model.pipeline import CatVTONPipeline, CatVTONPix2PixPipeline
 from model.flux.pipeline_flux_tryon import FluxTryOnPipeline
 from utils import init_weight_dtype, resize_and_crop, resize_and_padding
 def parse_args():
     parser = argparse.ArgumentParser(description="Simple example of a training script.")
     parser.add_argument(
         "--base_model_path",
         type=str,
-        default="booksforcharlie/stable-diffusion-inpainting",  # Change to a copy repo as runawayml delete original repo
         help=(
             "The path to the base model to use for evaluation. This can be a local path or a model identifier from the Model Hub."
         ),
     )
     parser.add_argument(
         "--resume_path",
         type=str,
@@ -88,70 +93,97 @@ def parse_args():
     )
     args = parser.parse_args()
     env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
     if env_local_rank != -1 and env_local_rank != args.local_rank:
         args.local_rank = env_local_rank
     return args
 def image_grid(imgs, rows, cols):
-    assert len(imgs) == rows * cols
     w, h = imgs[0].size
-    grid = Image.new("RGB", size=(cols * w, rows * h))
     for i, img in enumerate(imgs):
         grid.paste(img, box=(i % cols * w, i // cols * h))
     return grid
 args = parse_args()
-repo_path = snapshot_download(repo_id=args.resume_path)
-# Pipeline
 pipeline = CatVTONPipeline(
-    base_ckpt=args.base_model_path,
-    attn_ckpt=repo_path,
     attn_ckpt_version="mix",
-    weight_dtype=init_weight_dtype(args.mixed_precision),
-    use_tf32=args.allow_tf32,
-    device='cuda'
 )
-# AutoMasker
-mask_processor = VaeImageProcessor(vae_scale_factor=8, do_normalize=False, do_binarize=True, do_convert_grayscale=True)
 automasker = AutoMasker(
-    densepose_ckpt=os.path.join(repo_path, "DensePose"),
-    schp_ckpt=os.path.join(repo_path, "SCHP"),
     device='cuda',
 )
 def submit_function(
     person_image,
     cloth_image,
-    cloth_type,
     num_inference_steps,
     guidance_scale,
     seed,
-    show_type
 ):
-    person_image, mask = person_image["background"], person_image["layers"][0]
-    mask = Image.open(mask).convert("L")
-    if len(np.unique(np.array(mask))) == 1:
         mask = None
     else:
-        mask = np.array(mask)
-        mask[mask > 0] = 255
-        mask = Image.fromarray(mask)
-    tmp_folder = args.output_dir
-    date_str = datetime.now().strftime("%Y%m%d%H%M%S")
-    result_save_path = os.path.join(tmp_folder, date_str[:8], date_str[8:] + ".png")
     if not os.path.exists(os.path.join(tmp_folder, date_str[:8])):
-        os.makedirs(os.path.join(tmp_folder, date_str[:8]))
     generator = None
-    if seed != -1:
         generator = torch.Generator(device='cuda').manual_seed(seed)
     person_image = Image.open(person_image).convert("RGB")
     cloth_image = Image.open(cloth_image).convert("RGB")
     person_image = resize_and_crop(person_image, (args.width, args.height))
@@ -159,14 +191,15 @@ def submit_function(
     # Process mask
     if mask is not None:
-        mask = resize_and_crop(mask, (args.width, args.height))
     else:
         mask = automasker(
             person_image,
             cloth_type
-        )['mask']
-    mask = mask_processor.blur(mask, blur_factor=9)
     # Inference
     # try:
     result_image = pipeline(
@@ -182,90 +215,13 @@ def submit_function(
     #         "An error occurred. Please try again later: {}".format(e)
     #     )
-    # Post-process
-    masked_person = vis_mask(person_image, mask)
-    save_result_image = image_grid([person_image, masked_person, cloth_image, result_image], 1, 4)
     save_result_image.save(result_save_path)
-    if show_type == "result only":
-        return result_image
-    else:
-        width, height = person_image.size
-        if show_type == "input & result":
-            condition_width = width // 2
-            conditions = image_grid([person_image, cloth_image], 2, 1)
-        else:
-            condition_width = width // 3
-            conditions = image_grid([person_image, masked_person , cloth_image], 3, 1)
-        conditions = conditions.resize((condition_width, height), Image.NEAREST)
-        new_result_image = Image.new("RGB", (width + condition_width + 5, height))
-        new_result_image.paste(conditions, (0, 0))
-        new_result_image.paste(result_image, (condition_width + 5, 0))
-    return new_result_image
-@spaces.GPU(duration=120)
-def submit_function(
-    person_image,
-    cloth_image,
-    cloth_type,
-    num_inference_steps,
-    guidance_scale,
-    seed,
-    show_type
-):
-    person_image, mask = person_image["background"], person_image["layers"][0]
-    mask = Image.open(mask).convert("L")
-    if len(np.unique(np.array(mask))) == 1:
-        mask = None
-    else:
-        mask = np.array(mask)
-        mask[mask > 0] = 255
-        mask = Image.fromarray(mask)
-    tmp_folder = args.output_dir
-    date_str = datetime.now().strftime("%Y%m%d%H%M%S")
-    result_save_path = os.path.join(tmp_folder, date_str[:8], date_str[8:] + ".png")
-    if not os.path.exists(os.path.join(tmp_folder, date_str[:8])):
-        os.makedirs(os.path.join(tmp_folder, date_str[:8]))
-    generator = None
-    if seed != -1:
-        generator = torch.Generator(device='cuda').manual_seed(seed)
-    person_image = Image.open(person_image).convert("RGB")
-    cloth_image = Image.open(cloth_image).convert("RGB")
-    person_image = resize_and_crop(person_image, (args.width, args.height))
-    cloth_image = resize_and_padding(cloth_image, (args.width, args.height))
-    # Process mask
-    if mask is not None:
-        mask = resize_and_crop(mask, (args.width, args.height))
-    else:
-        mask = automasker(
-            person_image,
-            cloth_type
-        )['mask']
-    mask = mask_processor.blur(mask, blur_factor=9)
-    # Inference
-    # try:
-    result_image = pipeline(
-        image=person_image,
-        condition_image=cloth_image,
-        mask=mask,
-        num_inference_steps=num_inference_steps,
-        guidance_scale=guidance_scale,
-        generator=generator
-    )[0]
-    # except Exception as e:
-    #     raise gr.Error(
-    #         "An error occurred. Please try again later: {}".format(e)
-    #     )
-    # Post-process
-    masked_person = vis_mask(person_image, mask)
-    save_result_image = image_grid([person_image, masked_person, cloth_image, result_image], 1, 4)
-    save_result_image.save(result_save_path)
     if show_type == "result only":
         return result_image
     else:
@@ -276,272 +232,165 @@ def submit_function(
         else:
             condition_width = width // 3
             conditions = image_grid([person_image, masked_person , cloth_image], 3, 1)
-        conditions = conditions.resize((condition_width, height), Image.NEAREST)
-        new_result_image = Image.new("RGB", (width + condition_width + 5, height))
-        new_result_image.paste(conditions, (0, 0))
-        new_result_image.paste(result_image, (condition_width + 5, 0))
-    return new_result_image
-@spaces.GPU(duration=120)
-def submit_function_p2p(
-    person_image,
-    cloth_image,
-    num_inference_steps,
-    guidance_scale,
-    seed):
-    person_image= person_image["background"]
-    tmp_folder = args.output_dir
-    date_str = datetime.now().strftime("%Y%m%d%H%M%S")
-    result_save_path = os.path.join(tmp_folder, date_str[:8], date_str[8:] + ".png")
-    if not os.path.exists(os.path.join(tmp_folder, date_str[:8])):
-        os.makedirs(os.path.join(tmp_folder, date_str[:8]))
-    generator = None
-    if seed != -1:
-        generator = torch.Generator(device='cuda').manual_seed(seed)
-    person_image = Image.open(person_image).convert("RGB")
-    cloth_image = Image.open(cloth_image).convert("RGB")
-    person_image = resize_and_crop(person_image, (args.width, args.height))
-    cloth_image = resize_and_padding(cloth_image, (args.width, args.height))
-    # Inference
-    try:
-        result_image = pipeline_p2p(
-            image=person_image,
-            condition_image=cloth_image,
-            num_inference_steps=num_inference_steps,
-            guidance_scale=guidance_scale,
-            generator=generator
-        )[0]
-    except Exception as e:
-        raise gr.Error(
-            "An error occurred. Please try again later: {}".format(e)
-        )
-    # Post-process
-    save_result_image = image_grid([person_image, cloth_image, result_image], 1, 3)
-    save_result_image.save(result_save_path)
-    return result_image
-@spaces.GPU(duration=120)
-def submit_function_flux(
-    person_image,
-    cloth_image,
-    cloth_type,
-    num_inference_steps,
-    guidance_scale,
-    seed,
-    show_type
-):
-    # Process image editor input
-    person_image, mask = person_image["background"], person_image["layers"][0]
-    mask = Image.open(mask).convert("L")
-    if len(np.unique(np.array(mask))) == 1:
-        mask = None
-    else:
-        mask = np.array(mask)
-        mask[mask > 0] = 255
-        mask = Image.fromarray(mask)
-    # Set random seed
-    generator = None
-    if seed != -1:
-        generator = torch.Generator(device='cuda').manual_seed(seed)
-    # Process input images
-    person_image = Image.open(person_image).convert("RGB")
-    cloth_image = Image.open(cloth_image).convert("RGB")
-    # Adjust image sizes
-    person_image = resize_and_crop(person_image, (args.width, args.height))
-    cloth_image = resize_and_padding(cloth_image, (args.width, args.height))
-    # Process mask
-    if mask is not None:
-        mask = resize_and_crop(mask, (args.width, args.height))
-    else:
-        mask = automasker(
-            person_image,
-            cloth_type
-        )['mask']
-    mask = mask_processor.blur(mask, blur_factor=9)
-    # Inference
-    result_image = pipeline_flux(
-        image=person_image,
-        condition_image=cloth_image,
-        mask_image=mask,
-        width=args.width,
-        height=args.height,
-        num_inference_steps=num_inference_steps,
-        guidance_scale=guidance_scale,
-        generator=generator
-    ).images[0]
-    # Post-processing
-    masked_person = vis_mask(person_image, mask)
-    # Return result based on show type
-    if show_type == "result only":
-        return result_image
-    else:
-        width, height = person_image.size
-        if show_type == "input & result":
-            condition_width = width // 2
-            conditions = image_grid([person_image, cloth_image], 2, 1)
-        else:
-            condition_width = width // 3
-            conditions = image_grid([person_image, masked_person, cloth_image], 3, 1)
-        conditions = conditions.resize((condition_width, height), Image.NEAREST)
-        new_result_image = Image.new("RGB", (width + condition_width + 5, height))
         new_result_image.paste(conditions, (0, 0))
         new_result_image.paste(result_image, (condition_width + 5, 0))
-        return new_result_image
 def person_example_fn(image_path):
     return image_path
 HEADER = ""
 def app_gradio():
     with gr.Blocks(title="CatVTON") as demo:
         gr.Markdown(HEADER)
-        with gr.Row():
-            with gr.Column(scale=1, min_width=350):
-                with gr.Row():
-                    image_path = gr.Image(
-                        type="filepath",
-                        interactive=True,
-                        visible=False,
-                    )
-                    person_image = gr.ImageEditor(
-                        interactive=True, label="Person Image", type="filepath"
-                    )
-                with gr.Row():
-                    with gr.Column(scale=1, min_width=230):
-                        cloth_image = gr.Image(
-                            interactive=True, label="Condition Image", type="filepath"
-                        )
-                    with gr.Column(scale=1, min_width=120):
-                        gr.Markdown(
-                            '<span style="color: #808080; font-size: small;">Two ways to provide Mask:<br>1. Upload the person image and use the `🖌️` above to draw the Mask (higher priority)<br>2. Select the `Try-On Cloth Type` to generate automatically </span>'
                         )
-                        cloth_type = gr.Radio(
-                            label="Try-On Cloth Type",
-                            choices=["upper", "lower", "overall"],
-                            value="upper",
                         )
-                submit = gr.Button("Submit")
-                gr.Markdown(
-                    '<center><span style="color: #FF0000">!!! Click only Once, Wait for Delay !!!</span></center>'
-                )
-                gr.Markdown(
-                    '<span style="color: #808080; font-size: small;">Advanced options can adjust details:<br>1. `Inference Step` may enhance details;<br>2. `CFG` is highly correlated with saturation;<br>3. `Random seed` may improve pseudo-shadow.</span>'
-                )
-                with gr.Accordion("Advanced Options", open=False):
-                    num_inference_steps = gr.Slider(
-                        label="Inference Step", minimum=10, maximum=100, step=5, value=50
-                    )
-                    # Guidence Scale
-                    guidance_scale = gr.Slider(
-                        label="CFG Strenth", minimum=0.0, maximum=7.5, step=0.5, value=2.5
                     )
-                    # Random Seed
-                    seed = gr.Slider(
-                        label="Seed", minimum=-1, maximum=10000, step=1, value=42
                     )
-                    show_type = gr.Radio(
-                        label="Show Type",
-                        choices=["result only", "input & result", "input & mask & result"],
-                        value="input & mask & result",
-                    )
-            with gr.Column(scale=2, min_width=500):
-                result_image = gr.Image(interactive=False, label="Result")
-                with gr.Row():
-                    # Photo Examples
-                    root_path = "resource/demo/example"
-                    with gr.Column():
-                        men_exm = gr.Examples(
-                            examples=[
-                                os.path.join(root_path, "person", "men", _)
-                                for _ in os.listdir(os.path.join(root_path, "person", "men"))
-                            ],
-                            examples_per_page=4,
-                            inputs=image_path,
-                            label="Person Examples ①",
-                        )
-                        women_exm = gr.Examples(
-                            examples=[
-                                os.path.join(root_path, "person", "women", _)
-                                for _ in os.listdir(os.path.join(root_path, "person", "women"))
-                            ],
-                            examples_per_page=4,
-                            inputs=image_path,
-                            label="Person Examples ②",
-                        )
-                        gr.Markdown(
-                            '<span style="color: #808080; font-size: small;">*Person examples come from the demos of <a href="https://huggingface.co/spaces/levihsu/OOTDiffusion">OOTDiffusion</a> and <a href="https://www.outfitanyone.org">OutfitAnyone</a>. </span>'
-                        )
-                    with gr.Column():
-                        condition_upper_exm = gr.Examples(
-                            examples=[
-                                os.path.join(root_path, "condition", "upper", _)
-                                for _ in os.listdir(os.path.join(root_path, "condition", "upper"))
-                            ],
-                            examples_per_page=4,
-                            inputs=cloth_image,
-                            label="Condition Upper Examples",
                         )
-                        condition_overall_exm = gr.Examples(
-                            examples=[
-                                os.path.join(root_path, "condition", "overall", _)
-                                for _ in os.listdir(os.path.join(root_path, "condition", "overall"))
-                            ],
-                            examples_per_page=4,
-                            inputs=cloth_image,
-                            label="Condition Overall Examples",
                         )
-                        condition_person_exm = gr.Examples(
-                            examples=[
-                                os.path.join(root_path, "condition", "person", _)
-                                for _ in os.listdir(os.path.join(root_path, "condition", "person"))
-                            ],
-                            examples_per_page=4,
-                            inputs=cloth_image,
-                            label="Condition Reference Person Examples",
                         )
-                        gr.Markdown(
-                            '<span style="color: #808080; font-size: small;">*Condition examples come from the Internet. </span>'
                         )
-            image_path.change(
-                person_example_fn, inputs=image_path, outputs=person_image
-            )
-            submit.click(
-                submit_function,
-                [
-                    person_image,
-                    cloth_image,
-                    cloth_type,
-                    num_inference_steps,
-                    guidance_scale,
-                    seed,
-                    show_type,
-                ],
-                result_image,
-            )
-    demo.queue().launch(share=True, show_error=True)
 if __name__ == "__main__":

 from model.flux.pipeline_flux_tryon import FluxTryOnPipeline
 from utils import init_weight_dtype, resize_and_crop, resize_and_padding
+access_token = os.getenv('HF_ACCESS_TOKEN')
+# dùng để phân tích các tham số từ dòng lệnh và trả về cấu hình cài đặt cho chương trình
 def parse_args():
+    #  Khởi tạo đối tượng để quản lý các tham số dòng lệnh.
     parser = argparse.ArgumentParser(description="Simple example of a training script.")
     parser.add_argument(
         "--base_model_path",
         type=str,
+        default="booksforcharlie/stable-diffusion-inpainting",
         help=(
             "The path to the base model to use for evaluation. This can be a local path or a model identifier from the Model Hub."
         ),
     )
     parser.add_argument(
         "--resume_path",
         type=str,
     )
     args = parser.parse_args()
+    # Xử lý tham số:
+    # Đảm bảo rằng local_rank (chỉ số GPU cục bộ khi chạy phân tán) được đồng bộ từ biến môi trường
+    # Khi chạy các tác vụ huấn luyện phân tán, hệ thống cần biết chỉ số GPU cục bộ để phân bổ tài nguyên.
     env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
     if env_local_rank != -1 and env_local_rank != args.local_rank:
         args.local_rank = env_local_rank
     return args
+# Hàm image_grid tạo một lưới ảnh (grid) từ danh sách các ảnh đầu vào, với số hàng (rows) và số cột (cols) được chỉ định.
 def image_grid(imgs, rows, cols):
+    assert len(imgs) == rows * cols # Kiểm tra số lượng ảnh
     w, h = imgs[0].size
+    grid = Image.new("RGB", size=(cols * w, rows * h)) # Tạo ảnh trống làm lưới
+    #Duyệt qua các ảnh và ghép vào lưới
     for i, img in enumerate(imgs):
         grid.paste(img, box=(i % cols * w, i // cols * h))
     return grid
 args = parse_args()
+# Mask-based CatVTON
+catvton_repo = "zhengchong/CatVTON"
+repo_path = snapshot_download(repo_id=catvton_repo) # snapshot_download: Hàm này tải toàn bộ dữ liệu mô hình từ kho lưu trữ trên Hugging Face và lưu về máy cục bộ.
+# Pipeline thực hiện Virtual Try on (dùng mask)
 pipeline = CatVTONPipeline(
+    base_ckpt=args.base_model_path, # Checkpoint của mô hình cơ sở (dùng để tạo nền tảng cho pipeline).
+    attn_ckpt=repo_path,            # Checkpoint chứa các tham số của attention module, được tải từ repo_path.
     attn_ckpt_version="mix",
+    weight_dtype=init_weight_dtype(args.mixed_precision), # Kiểu dữ liệu của trọng số mô hình. Được thiết lập bởi hàm init_weight_dtype, có thể là fp16 hoặc bf16 tùy thuộc vào GPU và cấu hình.
+    use_tf32=args.allow_tf32,       # Cho phép sử dụng TensorFloat32 trên GPU Ampere (như A100) để tăng tốc.
+    device='cuda'                   # Thiết bị chạy mô hình (ở đây là cuda, tức GPU).
 )
+# AutoMasker Part
+# VaeImageProcessor: Bộ xử lý hình ảnh được thiết kế để làm việc với các mô hình dựa trên VAE (Variational Autoencoder).
+mask_processor = VaeImageProcessor(
+    vae_scale_factor=8,     # Tỉ lệ nén hình ảnh khi xử lý bằng VAE. Ảnh sẽ được giảm kích thước theo tỉ lệ 1/8.
+    do_normalize=False,     # Không thực hiện chuẩn hóa giá trị pixel (ví dụ: chuyển đổi giá trị về khoảng [0, 1]).
+    do_binarize=True,       # Chuyển đổi hình ảnh thành nhị phân (chỉ chứa 2 giá trị: 0 hoặc 255). Quan trọng để tạo mặt nạ rõ ràng.
+    do_convert_grayscale=True
+    )
+# AutoMasker: Công cụ tự động tạo mặt nạ dựa trên các mô hình dự đoán hình dạng cơ thể người và phân đoạn quần áo.
 automasker = AutoMasker(
+    densepose_ckpt=os.path.join(repo_path, "DensePose"), # DensePose: Mô hình dự đoán vị trí 3D của cơ thể từ ảnh 2D.
+    schp_ckpt=os.path.join(repo_path, "SCHP"),           # SCHP: Mô hình phân đoạn chi tiết cơ thể người (ví dụ: tách tóc, quần áo, da, v.v.).
     device='cuda',
 )
+# Hàm này nhận dữ liệu đầu vào (ảnh người, ảnh quần áo, các tham số) và thực hiện các bước xử lý để trả về ảnh kết quả.
+@spaces.GPU(duration=120) # Gán GPU để thực hiện hàm submit_function, với thời gian tối đa là 120 giây.
+    # Định nghĩa hàm nhận vào các tham số sau
 def submit_function(
     person_image,
     cloth_image,
+    cloth_type,     # upper, lower, hoặc overall
     num_inference_steps,
     guidance_scale,
     seed,
+    show_type       # Kiểu hiển thị kết quả (chỉ kết quả, kết hợp ảnh gốc và kết quả, hoặc hiển thị cả mặt nạ).
 ):
+    # Xử lý mặt nạ (mask)
+    person_image,
+    mask = person_image["background"],      # Lấy ảnh người từ lớp nền.
+    person_image["layers"][0]               # Lấy mặt nạ do người dùng vẽ (nếu có).
+    mask = Image.open(mask).convert("L")    # Chuyển mặt nạ thành ảnh thang độ xám
+    if len(np.unique(np.array(mask))) == 1: # Nếu mặt nạ chỉ chứa một giá trị (ví dụ: toàn đen hoặc toàn trắng), thì không sử dụng mặt nạ (mask = None).
         mask = None
     else:
+        mask = np.array(mask)               # Chuyển mặt nạ thành mảng numpy.
+        mask[mask > 0] = 255                # Các pixel có giá trị lớn hơn 0 được chuyển thành 255 (trắng).
+        mask = Image.fromarray(mask)        # Chuyển mảng trở lại thành ảnh.
+    # Xử lý đường dẫn lưu trữ kết quả
+    tmp_folder = args.output_dir                                # Thư mục tạm thời lưu kết quả.
+    date_str = datetime.now().strftime("%Y%m%d%H%M%S")          # Chuỗi ngày giờ hiện tại (ví dụ: 20250108).
+    result_save_path = os.path.join(tmp_folder, date_str[:8], date_str[8:] + ".png") # Đường dẫn đầy đủ để lưu ảnh kết quả.
     if not os.path.exists(os.path.join(tmp_folder, date_str[:8])):
+        os.makedirs(os.path.join(tmp_folder, date_str[:8]))     # Tạo thư mục lưu trữ nếu chưa tồn tại.
+    # Xử lý seed ngẫu nhiên
     generator = None
+    if seed != -1:     # Nếu seed được cung cấp, mô hình sẽ sử dụng giá trị này để sinh dữ liệu (giữ tính ngẫu nhiên nhưng tái tạo được).
         generator = torch.Generator(device='cuda').manual_seed(seed)
+    # Chuẩn hóa ảnh đầu vào
     person_image = Image.open(person_image).convert("RGB")
     cloth_image = Image.open(cloth_image).convert("RGB")
     person_image = resize_and_crop(person_image, (args.width, args.height))
     # Process mask
     if mask is not None:
+        mask = resize_and_crop(mask, (args.width, args.height)) # Nếu mặt nạ được cung cấp, thay đổi kích thước cho phù hợp.
     else:
         mask = automasker(
             person_image,
             cloth_type
+        )['mask']   # Nếu không, tạo mặt nạ tự động bằng automasker, dựa trên loại quần áo (cloth_type).
+    mask = mask_processor.blur(mask, blur_factor=9) # Làm mờ mặt nạ (blur) để giảm bớt các cạnh sắc
+    # Suy luận mô hình: gán các tham số vô hàm tính toán, trả lại result là hình ảnh
     # Inference
     # try:
     result_image = pipeline(
     #         "An error occurred. Please try again later: {}".format(e)
     #     )
+    # Post-process - Xử lý hậu kỳ
+    # Tạo ảnh kết quả lưới
+    masked_person = vis_mask(person_image, mask)    # Hiển thị ảnh người với mặt nạ được áp dụng.
+    save_result_image = image_grid([person_image, masked_person, cloth_image, result_image], 1, 4) # Tạo một ảnh lưới chứa
     save_result_image.save(result_save_path)
+    # Điều chỉnh hiển thị kết quả
     if show_type == "result only":
         return result_image
     else:
         else:
             condition_width = width // 3
             conditions = image_grid([person_image, masked_person , cloth_image], 3, 1)
+        conditions = conditions.resize((condition_width, height), Image.NEAREST)
+        # conditions: Ảnh ghép ban đầu, được tạo từ các ảnh như ảnh người gốc, ảnh quần áo, và ảnh mặt nạ (tùy chọn).
+        # Tham số Image.NEAREST: Đây là phương pháp nội suy (interpolation) gần nhất, dùng để thay đổi kích thước ảnh mà không làm mờ hay mất chi tiết.
+        new_result_image = Image.new("RGB", (width + condition_width + 5, height)) # Image.new: Tạo một ảnh trống mới
         new_result_image.paste(conditions, (0, 0))
         new_result_image.paste(result_image, (condition_width + 5, 0))
+    return new_result_image
 def person_example_fn(image_path):
     return image_path
 HEADER = ""
 def app_gradio():
     with gr.Blocks(title="CatVTON") as demo:
         gr.Markdown(HEADER)
+        with gr.Tab("Mask-based"):
+            with gr.Row():
+                with gr.Column(scale=1, min_width=350):
+                    # Ảnh model (người)
+                    with gr.Row():
+                        image_path = gr.Image(
+                            type="filepath",
+                            interactive=True,
+                            visible=False,
                         )
+                        person_image = gr.ImageEditor(
+                            interactive=True, label="Person Image", type="filepath"
                         )
+                    # Ảnh quần áo
+                    with gr.Row():
+                        with gr.Column(scale=1, min_width=230):
+                            cloth_image = gr.Image(
+                                interactive=True, label="Condition Image", type="filepath"
+                            )
+                        with gr.Column(scale=1, min_width=120):
+                            gr.Markdown(
+                                '<span style="color: #808080; font-size: small;">Two ways to provide Mask:<br>1. Upload the person image and use the `🖌️` above to draw the Mask (higher priority)<br>2. Select the `Try-On Cloth Type` to generate automatically </span>'
+                            )
+                            cloth_type = gr.Radio(
+                                label="Try-On Cloth Type",
+                                choices=["upper", "lower", "overall"],
+                                value="upper",
+                            )
+                    # Submit button - Run
+                    submit = gr.Button("Submit")
+                    gr.Markdown(
+                        '<center><span style="color: #FF0000">!!! Click only Once, Wait for Delay !!!</span></center>'
                     )
+                    # Advance setting
+                    gr.Markdown(
+                        '<span style="color: #808080; font-size: small;">Advanced options can adjust details:<br>1. `Inference Step` may enhance details;<br>2. `CFG` is highly correlated with saturation;<br>3. `Random seed` may improve pseudo-shadow.</span>'
                     )
+                    with gr.Accordion("Advanced Options", open=False):
+                        num_inference_steps = gr.Slider(
+                            label="Inference Step", minimum=10, maximum=100, step=5, value=50
                         )
+                        # Guidence Scale
+                        guidance_scale = gr.Slider(
+                            label="CFG Strenth", minimum=0.0, maximum=7.5, step=0.5, value=2.5
                         )
+                        # Random Seed
+                        seed = gr.Slider(
+                            label="Seed", minimum=-1, maximum=10000, step=1, value=42
                         )
+                        show_type = gr.Radio(
+                            label="Show Type",
+                            choices=["result only", "input & result", "input & mask & result"],
+                            value="input & mask & result",
                         )
+                with gr.Column(scale=2, min_width=500):
+                    # Result image
+                    result_image = gr.Image(interactive=False, label="Result")
+                    with gr.Row():
+                        # Photo Examples
+                        root_path = "resource/demo/example"
+                        with gr.Column():
+                            men_exm = gr.Examples(
+                                examples=[
+                                    os.path.join(root_path, "person", "men", _)
+                                    for _ in os.listdir(os.path.join(root_path, "person", "men"))
+                                ],
+                                examples_per_page=4,
+                                inputs=image_path,
+                                label="Person Examples ①",
+                            )
+                            women_exm = gr.Examples(
+                                examples=[
+                                    os.path.join(root_path, "person", "women", _)
+                                    for _ in os.listdir(os.path.join(root_path, "person", "women"))
+                                ],
+                                examples_per_page=4,
+                                inputs=image_path,
+                                label="Person Examples ②",
+                            )
+                            gr.Markdown(
+                                '<span style="color: #808080; font-size: small;">*Person examples come from the demos of <a href="https://huggingface.co/spaces/levihsu/OOTDiffusion">OOTDiffusion</a> and <a href="https://www.outfitanyone.org">OutfitAnyone</a>. </span>'
+                            )
+                        with gr.Column():
+                            condition_upper_exm = gr.Examples(
+                                examples=[
+                                    os.path.join(root_path, "condition", "upper", _)
+                                    for _ in os.listdir(os.path.join(root_path, "condition", "upper"))
+                                ],
+                                examples_per_page=4,
+                                inputs=cloth_image,
+                                label="Condition Upper Examples",
+                            )
+                            condition_overall_exm = gr.Examples(
+                                examples=[
+                                    os.path.join(root_path, "condition", "overall", _)
+                                    for _ in os.listdir(os.path.join(root_path, "condition", "overall"))
+                                ],
+                                examples_per_page=4,
+                                inputs=cloth_image,
+                                label="Condition Overall Examples",
+                            )
+                            condition_person_exm = gr.Examples(
+                                examples=[
+                                    os.path.join(root_path, "condition", "person", _)
+                                    for _ in os.listdir(os.path.join(root_path, "condition", "person"))
+                                ],
+                                examples_per_page=4,
+                                inputs=cloth_image,
+                                label="Condition Reference Person Examples",
+                            )
+                            gr.Markdown(
+                                '<span style="color: #808080; font-size: small;">*Condition examples come from the Internet. </span>'
+                            )
+                image_path.change(
+                    person_example_fn, inputs=image_path, outputs=person_image
+                )
+                # Function khi ấn nút submit
+                submit.click(
+                    submit_function,
+                    [
+                        person_image,
+                        cloth_image,
+                        cloth_type,
+                        num_inference_steps,
+                        guidance_scale,
+                        seed,
+                        show_type,
+                    ],
+                    result_image,
+                )
+    # demo.queue().launch(share=True, show_error=True)
+    demo.queue().launch()
 if __name__ == "__main__":