Spaces:

nvn04
/

StyleSnap

Build error

App Files Files Community

nvn04 commited on Jan 8

Commit

889dd25

verified ·

1 Parent(s): 3f087a3

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -172

app.py CHANGED Viewed

@@ -17,9 +17,13 @@ from model.pipeline import CatVTONPipeline, CatVTONPix2PixPipeline
 from model.flux.pipeline_flux_tryon import FluxTryOnPipeline
 from utils import init_weight_dtype, resize_and_crop, resize_and_padding
 def parse_args():
     parser = argparse.ArgumentParser(description="Simple example of a training script.")
     parser.add_argument(
         "--base_model_path",
         type=str,
@@ -28,14 +32,7 @@ def parse_args():
             "The path to the base model to use for evaluation. This can be a local path or a model identifier from the Model Hub."
         ),
     )
-    parser.add_argument(
-        "--p2p_base_model_path",
-        type=str,
-        default="timbrooks/instruct-pix2pix",
-        help=(
-            "The path to the base model to use for evaluation. This can be a local path or a model identifier from the Model Hub."
-        ),
-    )
     parser.add_argument(
         "--resume_path",
         type=str,
@@ -96,18 +93,24 @@ def parse_args():
     )
     args = parser.parse_args()
     env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
     if env_local_rank != -1 and env_local_rank != args.local_rank:
         args.local_rank = env_local_rank
     return args
 def image_grid(imgs, rows, cols):
-    assert len(imgs) == rows * cols
     w, h = imgs[0].size
-    grid = Image.new("RGB", size=(cols * w, rows * h))
     for i, img in enumerate(imgs):
         grid.paste(img, box=(i % cols * w, i // cols * h))
     return grid
@@ -117,57 +120,89 @@ args = parse_args()
 # Mask-based CatVTON
 catvton_repo = "zhengchong/CatVTON"
-repo_path = snapshot_download(repo_id=catvton_repo)
-# Pipeline
 pipeline = CatVTONPipeline(
-    base_ckpt=args.base_model_path,
-    attn_ckpt=repo_path,
     attn_ckpt_version="mix",
-    weight_dtype=init_weight_dtype(args.mixed_precision),
-    use_tf32=args.allow_tf32,
-    device='cuda'
 )
-# AutoMasker
-mask_processor = VaeImageProcessor(vae_scale_factor=8, do_normalize=False, do_binarize=True, do_convert_grayscale=True)
 automasker = AutoMasker(
-    densepose_ckpt=os.path.join(repo_path, "DensePose"),
-    schp_ckpt=os.path.join(repo_path, "SCHP"),
     device='cuda',
 )
-@spaces.GPU(duration=120)
 def submit_function(
     person_image,
     cloth_image,
-    cloth_type,
     num_inference_steps,
     guidance_scale,
     seed,
-    show_type
 ):
-    person_image, mask = person_image["background"], person_image["layers"][0]
-    mask = Image.open(mask).convert("L")
-    if len(np.unique(np.array(mask))) == 1:
-        mask = None
     else:
-        mask = np.array(mask)
-        mask[mask > 0] = 255
-        mask = Image.fromarray(mask)
-    tmp_folder = args.output_dir
-    date_str = datetime.now().strftime("%Y%m%d%H%M%S")
-    result_save_path = os.path.join(tmp_folder, date_str[:8], date_str[8:] + ".png")
     if not os.path.exists(os.path.join(tmp_folder, date_str[:8])):
-        os.makedirs(os.path.join(tmp_folder, date_str[:8]))
     generator = None
-    if seed != -1:
         generator = torch.Generator(device='cuda').manual_seed(seed)
     person_image = Image.open(person_image).convert("RGB")
     cloth_image = Image.open(cloth_image).convert("RGB")
     person_image = resize_and_crop(person_image, (args.width, args.height))
@@ -175,14 +210,15 @@ def submit_function(
     # Process mask
     if mask is not None:
-        mask = resize_and_crop(mask, (args.width, args.height))
     else:
         mask = automasker(
             person_image,
             cloth_type
-        )['mask']
-    mask = mask_processor.blur(mask, blur_factor=9)
     # Inference
     # try:
     result_image = pipeline(
@@ -198,10 +234,13 @@ def submit_function(
     #         "An error occurred. Please try again later: {}".format(e)
     #     )
-    # Post-process
-    masked_person = vis_mask(person_image, mask)
-    save_result_image = image_grid([person_image, masked_person, cloth_image, result_image], 1, 4)
     save_result_image.save(result_save_path)
     if show_type == "result only":
         return result_image
     else:
@@ -212,131 +251,14 @@ def submit_function(
         else:
             condition_width = width // 3
             conditions = image_grid([person_image, masked_person , cloth_image], 3, 1)
-        conditions = conditions.resize((condition_width, height), Image.NEAREST)
-        new_result_image = Image.new("RGB", (width + condition_width + 5, height))
-        new_result_image.paste(conditions, (0, 0))
-        new_result_image.paste(result_image, (condition_width + 5, 0))
-    return new_result_image
-@spaces.GPU(duration=120)
-def submit_function_p2p(
-    person_image,
-    cloth_image,
-    num_inference_steps,
-    guidance_scale,
-    seed):
-    person_image= person_image["background"]
-    tmp_folder = args.output_dir
-    date_str = datetime.now().strftime("%Y%m%d%H%M%S")
-    result_save_path = os.path.join(tmp_folder, date_str[:8], date_str[8:] + ".png")
-    if not os.path.exists(os.path.join(tmp_folder, date_str[:8])):
-        os.makedirs(os.path.join(tmp_folder, date_str[:8]))
-    generator = None
-    if seed != -1:
-        generator = torch.Generator(device='cuda').manual_seed(seed)
-    person_image = Image.open(person_image).convert("RGB")
-    cloth_image = Image.open(cloth_image).convert("RGB")
-    person_image = resize_and_crop(person_image, (args.width, args.height))
-    cloth_image = resize_and_padding(cloth_image, (args.width, args.height))
-    # Inference
-    try:
-        result_image = pipeline_p2p(
-            image=person_image,
-            condition_image=cloth_image,
-            num_inference_steps=num_inference_steps,
-            guidance_scale=guidance_scale,
-            generator=generator
-        )[0]
-    except Exception as e:
-        raise gr.Error(
-            "An error occurred. Please try again later: {}".format(e)
-        )
-    # Post-process
-    save_result_image = image_grid([person_image, cloth_image, result_image], 1, 3)
-    save_result_image.save(result_save_path)
-    return result_image
-@spaces.GPU(duration=120)
-def submit_function_flux(
-    person_image,
-    cloth_image,
-    cloth_type,
-    num_inference_steps,
-    guidance_scale,
-    seed,
-    show_type
-):
-    # Process image editor input
-    person_image, mask = person_image["background"], person_image["layers"][0]
-    mask = Image.open(mask).convert("L")
-    if len(np.unique(np.array(mask))) == 1:
-        mask = None
-    else:
-        mask = np.array(mask)
-        mask[mask > 0] = 255
-        mask = Image.fromarray(mask)
-    # Set random seed
-    generator = None
-    if seed != -1:
-        generator = torch.Generator(device='cuda').manual_seed(seed)
-    # Process input images
-    person_image = Image.open(person_image).convert("RGB")
-    cloth_image = Image.open(cloth_image).convert("RGB")
-    # Adjust image sizes
-    person_image = resize_and_crop(person_image, (args.width, args.height))
-    cloth_image = resize_and_padding(cloth_image, (args.width, args.height))
-    # Process mask
-    if mask is not None:
-        mask = resize_and_crop(mask, (args.width, args.height))
-    else:
-        mask = automasker(
-            person_image,
-            cloth_type
-        )['mask']
-    mask = mask_processor.blur(mask, blur_factor=9)
-    # Inference
-    result_image = pipeline_flux(
-        image=person_image,
-        condition_image=cloth_image,
-        mask_image=mask,
-        width=args.width,
-        height=args.height,
-        num_inference_steps=num_inference_steps,
-        guidance_scale=guidance_scale,
-        generator=generator
-    ).images[0]
-    # Post-processing
-    masked_person = vis_mask(person_image, mask)
-    # Return result based on show type
-    if show_type == "result only":
-        return result_image
-    else:
-        width, height = person_image.size
-        if show_type == "input & result":
-            condition_width = width // 2
-            conditions = image_grid([person_image, cloth_image], 2, 1)
-        else:
-            condition_width = width // 3
-            conditions = image_grid([person_image, masked_person, cloth_image], 3, 1)
-        conditions = conditions.resize((condition_width, height), Image.NEAREST)
-        new_result_image = Image.new("RGB", (width + condition_width + 5, height))
         new_result_image.paste(conditions, (0, 0))
         new_result_image.paste(result_image, (condition_width + 5, 0))
-        return new_result_image
 def person_example_fn(image_path):
@@ -348,9 +270,10 @@ HEADER = ""
 def app_gradio():
     with gr.Blocks(title="CatVTON") as demo:
         gr.Markdown(HEADER)
-        with gr.Tab("Mask-based & SD1.5"):
             with gr.Row():
                 with gr.Column(scale=1, min_width=350):
                     with gr.Row():
                         image_path = gr.Image(
                             type="filepath",
@@ -361,6 +284,7 @@ def app_gradio():
                             interactive=True, label="Person Image", type="filepath"
                         )
                     with gr.Row():
                         with gr.Column(scale=1, min_width=230):
                             cloth_image = gr.Image(
@@ -376,12 +300,13 @@ def app_gradio():
                                 value="upper",
                             )
                     submit = gr.Button("Submit")
                     gr.Markdown(
                         '<center><span style="color: #FF0000">!!! Click only Once, Wait for Delay !!!</span></center>'
                     )
                     gr.Markdown(
                         '<span style="color: #808080; font-size: small;">Advanced options can adjust details:<br>1. `Inference Step` may enhance details;<br>2. `CFG` is highly correlated with saturation;<br>3. `Random seed` may improve pseudo-shadow.</span>'
                     )
@@ -403,7 +328,9 @@ def app_gradio():
                             value="input & mask & result",
                         )
                 with gr.Column(scale=2, min_width=500):
                     result_image = gr.Image(interactive=False, label="Result")
                     with gr.Row():
                         # Photo Examples
@@ -466,6 +393,7 @@ def app_gradio():
                     person_example_fn, inputs=image_path, outputs=person_image
                 )
                 submit.click(
                     submit_function,
                     [
@@ -479,10 +407,9 @@ def app_gradio():
                     ],
                     result_image,
                 )
     demo.queue().launch(share=True, show_error=True)
 if __name__ == "__main__":

 from model.flux.pipeline_flux_tryon import FluxTryOnPipeline
 from utils import init_weight_dtype, resize_and_crop, resize_and_padding
+access_token = os.getenv('HF_ACCESS_TOKEN')
+# dùng để phân tích các tham số từ dòng lệnh và trả về cấu hình cài đặt cho chương trình
 def parse_args():
+    #  Khởi tạo đối tượng để quản lý các tham số dòng lệnh.
     parser = argparse.ArgumentParser(description="Simple example of a training script.")
     parser.add_argument(
         "--base_model_path",
         type=str,
             "The path to the base model to use for evaluation. This can be a local path or a model identifier from the Model Hub."
         ),
     )
     parser.add_argument(
         "--resume_path",
         type=str,
     )
     args = parser.parse_args()
+    # Xử lý tham số:
+    # Đảm bảo rằng local_rank (chỉ số GPU cục bộ khi chạy phân tán) được đồng bộ từ biến môi trường
+    # Khi chạy các tác vụ huấn luyện phân tán, hệ thống cần biết chỉ số GPU cục bộ để phân bổ tài nguyên.
     env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
     if env_local_rank != -1 and env_local_rank != args.local_rank:
         args.local_rank = env_local_rank
     return args
+# Hàm image_grid tạo một lưới ảnh (grid) từ danh sách các ảnh đầu vào, với số hàng (rows) và số cột (cols) được chỉ định.
 def image_grid(imgs, rows, cols):
+    assert len(imgs) == rows * cols # Kiểm tra số lượng ảnh
     w, h = imgs[0].size
+    grid = Image.new("RGB", size=(cols * w, rows * h)) # Tạo ảnh trống làm lưới
+    #Duyệt qua các ảnh và ghép vào lưới
     for i, img in enumerate(imgs):
         grid.paste(img, box=(i % cols * w, i // cols * h))
     return grid
 # Mask-based CatVTON
 catvton_repo = "zhengchong/CatVTON"
+repo_path = snapshot_download(repo_id=catvton_repo) # snapshot_download: Hàm này tải toàn bộ dữ liệu mô hình từ kho lưu trữ trên Hugging Face và lưu về máy cục bộ.
+# Pipeline thực hiện Virtual Try on (dùng mask)
 pipeline = CatVTONPipeline(
+    base_ckpt=args.base_model_path, # Checkpoint của mô hình cơ sở (dùng để tạo nền tảng cho pipeline).
+    attn_ckpt=repo_path,            # Checkpoint chứa các tham số của attention module, được tải từ repo_path.
     attn_ckpt_version="mix",
+    weight_dtype=init_weight_dtype(args.mixed_precision), # Kiểu dữ liệu của trọng số mô hình. Được thiết lập bởi hàm init_weight_dtype, có thể là fp16 hoặc bf16 tùy thuộc vào GPU và cấu hình.
+    use_tf32=args.allow_tf32,       # Cho phép sử dụng TensorFloat32 trên GPU Ampere (như A100) để tăng tốc.
+    device='cuda'                   # Thiết bị chạy mô hình (ở đây là cuda, tức GPU).
 )
+# AutoMasker Part
+# VaeImageProcessor: Bộ xử lý hình ảnh được thiết kế để làm việc với các mô hình dựa trên VAE (Variational Autoencoder).
+mask_processor = VaeImageProcessor(
+    vae_scale_factor=8,     # Tỉ lệ nén hình ảnh khi xử lý bằng VAE. Ảnh sẽ được giảm kích thước theo tỉ lệ 1/8.
+    do_normalize=False,     # Không thực hiện chuẩn hóa giá trị pixel (ví dụ: chuyển đổi giá trị về khoảng [0, 1]).
+    do_binarize=True,       # Chuyển đổi hình ảnh thành nhị phân (chỉ chứa 2 giá trị: 0 hoặc 255). Quan trọng để tạo mặt nạ rõ ràng.
+    do_convert_grayscale=True
+    )
+# AutoMasker: Công cụ tự động tạo mặt nạ dựa trên các mô hình dự đoán hình dạng cơ thể người và phân đoạn quần áo.
 automasker = AutoMasker(
+    densepose_ckpt=os.path.join(repo_path, "DensePose"), # DensePose: Mô hình dự đoán vị trí 3D của cơ thể từ ảnh 2D.
+    schp_ckpt=os.path.join(repo_path, "SCHP"),           # SCHP: Mô hình phân đoạn chi tiết cơ thể người (ví dụ: tách tóc, quần áo, da, v.v.).
     device='cuda',
 )
+# Hàm này nhận dữ liệu đầu vào (ảnh người, ảnh quần áo, các tham số) và thực hiện các bước xử lý để trả về ảnh kết quả.
+@spaces.GPU(duration=120) # Gán GPU để thực hiện hàm submit_function, với thời gian tối đa là 120 giây.
+    # Định nghĩa hàm nhận vào các tham số sau
 def submit_function(
     person_image,
     cloth_image,
+    cloth_type,     # upper, lower, hoặc overall
     num_inference_steps,
     guidance_scale,
     seed,
+    show_type       # Kiểu hiển thị kết quả (chỉ kết quả, kết hợp ảnh gốc và kết quả, hoặc hiển thị cả mặt nạ).
 ):
+    # Xử lý mặt nạ (mask)
+    person_image,
+    mask = person_image["background"],      # Lấy ảnh người từ lớp nền.
+    #person_image["layers"][0]               # Lấy mặt nạ do người dùng vẽ (nếu có).
+    if len(person_image["layers"]) > 0:
+        # Nếu danh sách không rỗng, lấy phần tử đầu tiên
+        layer = person_image["layers"][0]
+    else:
+        # Nếu danh sách rỗng, thực hiện hành động thay thế hoặc thông báo lỗi
+        layer = None
+    print("Không có layers trong person_image.")
+    print(mask)
+    #mask = Image.open(mask).convert("L")    # Chuyển mặt nạ thành ảnh thang độ xám
+    if mask is None:
+        raise ValueError("Tham số 'mask' bị rỗng.")
+    elif isinstance(mask, (str, bytes)) or hasattr(mask, "read"):
+        mask = Image.open(mask).convert("L")
     else:
+        raise ValueError(f"Kiểu dữ liệu '{type(mask)}' của 'mask' không được hỗ trợ.")
+    print("Loại của mask:", type(mask))
+    print("Giá trị của mask:", mask)
+    if len(np.unique(np.array(mask))) == 1: # Nếu mặt nạ chỉ chứa một giá trị (ví dụ: toàn đen hoặc toàn trắng), thì không sử dụng mặt nạ (mask = None).
+        mask = None
+    else:
+        mask = np.array(mask)               # Chuyển mặt nạ thành mảng numpy.
+        mask[mask > 0] = 255                # Các pixel có giá trị lớn hơn 0 được chuyển thành 255 (trắng).
+        mask = Image.fromarray(mask)        # Chuyển mảng trở lại thành ảnh.
+    # Xử lý đường dẫn lưu trữ kết quả
+    tmp_folder = args.output_dir                                # Thư mục tạm thời lưu kết quả.
+    date_str = datetime.now().strftime("%Y%m%d%H%M%S")          # Chuỗi ngày giờ hiện tại (ví dụ: 20250108).
+    result_save_path = os.path.join(tmp_folder, date_str[:8], date_str[8:] + ".png") # Đường dẫn đầy đủ để lưu ảnh kết quả.
     if not os.path.exists(os.path.join(tmp_folder, date_str[:8])):
+        os.makedirs(os.path.join(tmp_folder, date_str[:8]))     # Tạo thư mục lưu trữ nếu chưa tồn tại.
+    # Xử lý seed ngẫu nhiên
     generator = None
+    if seed != -1:     # Nếu seed được cung cấp, mô hình sẽ sử dụng giá trị này để sinh dữ liệu (giữ tính ngẫu nhiên nhưng tái tạo được).
         generator = torch.Generator(device='cuda').manual_seed(seed)
+    # Chuẩn hóa ảnh đầu vào
     person_image = Image.open(person_image).convert("RGB")
     cloth_image = Image.open(cloth_image).convert("RGB")
     person_image = resize_and_crop(person_image, (args.width, args.height))
     # Process mask
     if mask is not None:
+        mask = resize_and_crop(mask, (args.width, args.height)) # Nếu mặt nạ được cung cấp, thay đổi kích thước cho phù hợp.
     else:
         mask = automasker(
             person_image,
             cloth_type
+        )['mask']   # Nếu không, tạo mặt nạ tự động bằng automasker, dựa trên loại quần áo (cloth_type).
+    mask = mask_processor.blur(mask, blur_factor=9) # Làm mờ mặt nạ (blur) để giảm bớt các cạnh sắc
+    # Suy luận mô hình: gán các tham số vô hàm tính toán, trả lại result là hình ảnh
     # Inference
     # try:
     result_image = pipeline(
     #         "An error occurred. Please try again later: {}".format(e)
     #     )
+    # Post-process - Xử lý hậu kỳ
+    # Tạo ảnh kết quả lưới
+    masked_person = vis_mask(person_image, mask)    # Hiển thị ảnh người với mặt nạ được áp dụng.
+    save_result_image = image_grid([person_image, masked_person, cloth_image, result_image], 1, 4) # Tạo một ảnh lưới chứa
     save_result_image.save(result_save_path)
+    # Điều chỉnh hiển thị kết quả
     if show_type == "result only":
         return result_image
     else:
         else:
             condition_width = width // 3
             conditions = image_grid([person_image, masked_person , cloth_image], 3, 1)
+        conditions = conditions.resize((condition_width, height), Image.NEAREST)
+        # conditions: Ảnh ghép ban đầu, được tạo từ các ảnh như ảnh người gốc, ảnh quần áo, và ảnh mặt nạ (tùy chọn).
+        # Tham số Image.NEAREST: Đây là phương pháp nội suy (interpolation) gần nhất, dùng để thay đổi kích thước ảnh mà không làm mờ hay mất chi tiết.
+        new_result_image = Image.new("RGB", (width + condition_width + 5, height)) # Image.new: Tạo một ảnh trống mới
         new_result_image.paste(conditions, (0, 0))
         new_result_image.paste(result_image, (condition_width + 5, 0))
+    return new_result_image
 def person_example_fn(image_path):
 def app_gradio():
     with gr.Blocks(title="CatVTON") as demo:
         gr.Markdown(HEADER)
+        with gr.Tab("Mask-based"):
             with gr.Row():
                 with gr.Column(scale=1, min_width=350):
+                    # Ảnh model (người)
                     with gr.Row():
                         image_path = gr.Image(
                             type="filepath",
                             interactive=True, label="Person Image", type="filepath"
                         )
+                    # Ảnh quần áo
                     with gr.Row():
                         with gr.Column(scale=1, min_width=230):
                             cloth_image = gr.Image(
                                 value="upper",
                             )
+                    # Submit button - Run
                     submit = gr.Button("Submit")
                     gr.Markdown(
                         '<center><span style="color: #FF0000">!!! Click only Once, Wait for Delay !!!</span></center>'
                     )
+                    # Advance setting
                     gr.Markdown(
                         '<span style="color: #808080; font-size: small;">Advanced options can adjust details:<br>1. `Inference Step` may enhance details;<br>2. `CFG` is highly correlated with saturation;<br>3. `Random seed` may improve pseudo-shadow.</span>'
                     )
                             value="input & mask & result",
                         )
                 with gr.Column(scale=2, min_width=500):
+                    # Result image
                     result_image = gr.Image(interactive=False, label="Result")
                     with gr.Row():
                         # Photo Examples
                     person_example_fn, inputs=image_path, outputs=person_image
                 )
+                # Function khi ấn nút submit
                 submit.click(
                     submit_function,
                     [
                     ],
                     result_image,
                 )
     demo.queue().launch(share=True, show_error=True)
+    #demo.queue().launch()
 if __name__ == "__main__":