Spaces:

nvn04
/

StyleSnap

Build error

App Files Files Community

nvn04 commited on Jan 8

Commit

d589687

verified ·

1 Parent(s): a8fdc3c

Update app.py

Browse files

Files changed (1) hide show

app.py +170 -98

app.py CHANGED Viewed

@@ -17,13 +17,9 @@ from model.pipeline import CatVTONPipeline, CatVTONPix2PixPipeline
 from model.flux.pipeline_flux_tryon import FluxTryOnPipeline
 from utils import init_weight_dtype, resize_and_crop, resize_and_padding
-access_token = os.getenv('HF_ACCESS_TOKEN')
-# dùng để phân tích các tham số từ dòng lệnh và trả về cấu hình cài đặt cho chương trình
 def parse_args():
-    #  Khởi tạo đối tượng để quản lý các tham số dòng lệnh.
     parser = argparse.ArgumentParser(description="Simple example of a training script.")
     parser.add_argument(
         "--base_model_path",
         type=str,
@@ -32,7 +28,14 @@ def parse_args():
             "The path to the base model to use for evaluation. This can be a local path or a model identifier from the Model Hub."
         ),
     )
     parser.add_argument(
         "--resume_path",
         type=str,
@@ -93,24 +96,18 @@ def parse_args():
     )
     args = parser.parse_args()
-    # Xử lý tham số:
-    # Đảm bảo rằng local_rank (chỉ số GPU cục bộ khi chạy phân tán) được đồng bộ từ biến môi trường
-    # Khi chạy các tác vụ huấn luyện phân tán, hệ thống cần biết chỉ số GPU cục bộ để phân bổ tài nguyên.
     env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
     if env_local_rank != -1 and env_local_rank != args.local_rank:
         args.local_rank = env_local_rank
     return args
-# Hàm image_grid tạo một lưới ảnh (grid) từ danh sách các ảnh đầu vào, với số hàng (rows) và số cột (cols) được chỉ định.
 def image_grid(imgs, rows, cols):
-    assert len(imgs) == rows * cols # Kiểm tra số lượng ảnh
     w, h = imgs[0].size
-    grid = Image.new("RGB", size=(cols * w, rows * h)) # Tạo ảnh trống làm lưới
-    #Duyệt qua các ảnh và ghép vào lưới
     for i, img in enumerate(imgs):
         grid.paste(img, box=(i % cols * w, i // cols * h))
     return grid
@@ -120,88 +117,56 @@ args = parse_args()
 # Mask-based CatVTON
 catvton_repo = "zhengchong/CatVTON"
-repo_path = snapshot_download(repo_id=catvton_repo) # snapshot_download: Hàm này tải toàn bộ dữ liệu mô hình từ kho lưu trữ trên Hugging Face và lưu về máy cục bộ.
-# Pipeline thực hiện Virtual Try on (dùng mask)
 pipeline = CatVTONPipeline(
-    base_ckpt=args.base_model_path, # Checkpoint của mô hình cơ sở (dùng để tạo nền tảng cho pipeline).
-    attn_ckpt=repo_path,            # Checkpoint chứa các tham số của attention module, được tải từ repo_path.
     attn_ckpt_version="mix",
-    weight_dtype=init_weight_dtype(args.mixed_precision), # Kiểu dữ liệu của trọng số mô hình. Được thiết lập bởi hàm init_weight_dtype, có thể là fp16 hoặc bf16 tùy thuộc vào GPU và cấu hình.
-    use_tf32=args.allow_tf32,       # Cho phép sử dụng TensorFloat32 trên GPU Ampere (như A100) để tăng tốc.
-    device='cuda'                   # Thiết bị chạy mô hình (ở đây là cuda, tức GPU).
 )
-# AutoMasker Part
-# VaeImageProcessor: Bộ xử lý hình ảnh được thiết kế để làm việc với các mô hình dựa trên VAE (Variational Autoencoder).
-mask_processor = VaeImageProcessor(
-    vae_scale_factor=8,     # Tỉ lệ nén hình ảnh khi xử lý bằng VAE. Ảnh sẽ được giảm kích thước theo tỉ lệ 1/8.
-    do_normalize=False,     # Không thực hiện chuẩn hóa giá trị pixel (ví dụ: chuyển đổi giá trị về khoảng [0, 1]).
-    do_binarize=True,       # Chuyển đổi hình ảnh thành nhị phân (chỉ chứa 2 giá trị: 0 hoặc 255). Quan trọng để tạo mặt nạ rõ ràng.
-    do_convert_grayscale=True
-    )
-# AutoMasker: Công cụ tự động tạo mặt nạ dựa trên các mô hình dự đoán hình dạng cơ thể người và phân đoạn quần áo.
 automasker = AutoMasker(
-    densepose_ckpt=os.path.join(repo_path, "DensePose"), # DensePose: Mô hình dự đoán vị trí 3D của cơ thể từ ảnh 2D.
-    schp_ckpt=os.path.join(repo_path, "SCHP"),           # SCHP: Mô hình phân đoạn chi tiết cơ thể người (ví dụ: tách tóc, quần áo, da, v.v.).
     device='cuda',
 )
-# Hàm này nhận dữ liệu đầu vào (ảnh người, ảnh quần áo, các tham số) và thực hiện các bước xử lý để trả về ảnh kết quả.
-@spaces.GPU(duration=120) # Gán GPU để thực hiện hàm submit_function, với thời gian tối đa là 120 giây.
-    # Định nghĩa hàm nhận vào các tham số sau
 def submit_function(
     person_image,
     cloth_image,
-    cloth_type,     # upper, lower, hoặc overall
     num_inference_steps,
     guidance_scale,
     seed,
-    show_type       # Kiểu hiển thị kết quả (chỉ kết quả, kết hợp ảnh gốc và kết quả, hoặc hiển thị cả mặt nạ).
 ):
-    # Xử lý mặt nạ (mask)
-    person_image,
-    mask = person_image["background"],      # Lấy ảnh người từ lớp nền.
-    #person_image["layers"][0]               # Lấy mặt nạ do người dùng vẽ (nếu có).
-    if len(person_image["layers"]) > 0:
-        # Nếu danh sách không rỗng, lấy phần tử đầu tiên
-        layer = person_image["layers"][0]
-    else:
-        # Nếu danh sách rỗng, thực hiện hành động thay thế hoặc thông báo lỗi
-        layer = None
-    print("Không có layers trong person_image.")
-    print("Loại của mask:", type(mask))
-    print("Giá trị của mask:", mask)
-    #mask = Image.open(mask).convert("L")    # Chuyển mặt nạ thành ảnh thang độ xám
-    if mask is None:
-        raise ValueError("Tham số 'mask' bị rỗng.")
-    elif isinstance(mask, (str, bytes)) or hasattr(mask, "read"):
-        mask = Image.open(mask).convert("L")
-    else:
-        raise ValueError(f"Kiểu dữ liệu '{type(mask)}' của 'mask' không được hỗ trợ.")
-    if len(np.unique(np.array(mask))) == 1: # Nếu mặt nạ chỉ chứa một giá trị (ví dụ: toàn đen hoặc toàn trắng), thì không sử dụng mặt nạ (mask = None).
         mask = None
     else:
-        mask = np.array(mask)               # Chuyển mặt nạ thành mảng numpy.
-        mask[mask > 0] = 255                # Các pixel có giá trị lớn hơn 0 được chuyển thành 255 (trắng).
-        mask = Image.fromarray(mask)        # Chuyển mảng trở lại thành ảnh.
-    # Xử lý đường dẫn lưu trữ kết quả
-    tmp_folder = args.output_dir                                # Thư mục tạm thời lưu kết quả.
-    date_str = datetime.now().strftime("%Y%m%d%H%M%S")          # Chuỗi ngày giờ hiện tại (ví dụ: 20250108).
-    result_save_path = os.path.join(tmp_folder, date_str[:8], date_str[8:] + ".png") # Đường dẫn đầy đủ để lưu ảnh kết quả.
     if not os.path.exists(os.path.join(tmp_folder, date_str[:8])):
-        os.makedirs(os.path.join(tmp_folder, date_str[:8]))     # Tạo thư mục lưu trữ nếu chưa tồn tại.
-    # Xử lý seed ngẫu nhiên
     generator = None
-    if seed != -1:     # Nếu seed được cung cấp, mô hình sẽ sử dụng giá trị này để sinh dữ liệu (giữ tính ngẫu nhiên nhưng tái tạo được).
         generator = torch.Generator(device='cuda').manual_seed(seed)
-    # Chuẩn hóa ảnh đầu vào
     person_image = Image.open(person_image).convert("RGB")
     cloth_image = Image.open(cloth_image).convert("RGB")
     person_image = resize_and_crop(person_image, (args.width, args.height))
@@ -209,15 +174,14 @@ def submit_function(
     # Process mask
     if mask is not None:
-        mask = resize_and_crop(mask, (args.width, args.height)) # Nếu mặt nạ được cung cấp, thay đổi kích thước cho phù hợp.
     else:
         mask = automasker(
             person_image,
             cloth_type
-        )['mask']   # Nếu không, tạo mặt nạ tự động bằng automasker, dựa trên loại quần áo (cloth_type).
-    mask = mask_processor.blur(mask, blur_factor=9) # Làm mờ mặt nạ (blur) để giảm bớt các cạnh sắc
-    # Suy luận mô hình: gán các tham số vô hàm tính toán, trả lại result là hình ảnh
     # Inference
     # try:
     result_image = pipeline(
@@ -233,13 +197,10 @@ def submit_function(
     #         "An error occurred. Please try again later: {}".format(e)
     #     )
-    # Post-process - Xử lý hậu kỳ
-    # Tạo ảnh kết quả lưới
-    masked_person = vis_mask(person_image, mask)    # Hiển thị ảnh người với mặt nạ được áp dụng.
-    save_result_image = image_grid([person_image, masked_person, cloth_image, result_image], 1, 4) # Tạo một ảnh lưới chứa
     save_result_image.save(result_save_path)
-    # Điều chỉnh hiển thị kết quả
     if show_type == "result only":
         return result_image
     else:
@@ -250,15 +211,132 @@ def submit_function(
         else:
             condition_width = width // 3
             conditions = image_grid([person_image, masked_person , cloth_image], 3, 1)
-        conditions = conditions.resize((condition_width, height), Image.NEAREST)
-        # conditions: Ảnh ghép ban đầu, được tạo từ các ảnh như ảnh người gốc, ảnh quần áo, và ảnh mặt nạ (tùy chọn).
-        # Tham số Image.NEAREST: Đây là phương pháp nội suy (interpolation) gần nhất, dùng để thay đổi kích thước ảnh mà không làm mờ hay mất chi tiết.
-        new_result_image = Image.new("RGB", (width + condition_width + 5, height)) # Image.new: Tạo một ảnh trống mới
         new_result_image.paste(conditions, (0, 0))
         new_result_image.paste(result_image, (condition_width + 5, 0))
     return new_result_image
 def person_example_fn(image_path):
     return image_path
@@ -269,10 +347,9 @@ HEADER = ""
 def app_gradio():
     with gr.Blocks(title="CatVTON") as demo:
         gr.Markdown(HEADER)
-        with gr.Tab("Mask-based"):
             with gr.Row():
                 with gr.Column(scale=1, min_width=350):
-                    # Ảnh model (người)
                     with gr.Row():
                         image_path = gr.Image(
                             type="filepath",
@@ -283,7 +360,6 @@ def app_gradio():
                             interactive=True, label="Person Image", type="filepath"
                         )
-                    # Ảnh quần áo
                     with gr.Row():
                         with gr.Column(scale=1, min_width=230):
                             cloth_image = gr.Image(
@@ -299,13 +375,12 @@ def app_gradio():
                                 value="upper",
                             )
-                    # Submit button - Run
                     submit = gr.Button("Submit")
                     gr.Markdown(
                         '<center><span style="color: #FF0000">!!! Click only Once, Wait for Delay !!!</span></center>'
                     )
-                    # Advance setting
                     gr.Markdown(
                         '<span style="color: #808080; font-size: small;">Advanced options can adjust details:<br>1. `Inference Step` may enhance details;<br>2. `CFG` is highly correlated with saturation;<br>3. `Random seed` may improve pseudo-shadow.</span>'
                     )
@@ -327,9 +402,7 @@ def app_gradio():
                             value="input & mask & result",
                         )
                 with gr.Column(scale=2, min_width=500):
-                    # Result image
                     result_image = gr.Image(interactive=False, label="Result")
                     with gr.Row():
                         # Photo Examples
@@ -392,7 +465,6 @@ def app_gradio():
                     person_example_fn, inputs=image_path, outputs=person_image
                 )
-                # Function khi ấn nút submit
                 submit.click(
                     submit_function,
                     [
@@ -406,9 +478,9 @@ def app_gradio():
                     ],
                     result_image,
                 )
     demo.queue().launch(share=True, show_error=True)
-    #demo.queue().launch()
 if __name__ == "__main__":

 from model.flux.pipeline_flux_tryon import FluxTryOnPipeline
 from utils import init_weight_dtype, resize_and_crop, resize_and_padding
 def parse_args():
     parser = argparse.ArgumentParser(description="Simple example of a training script.")
     parser.add_argument(
         "--base_model_path",
         type=str,
             "The path to the base model to use for evaluation. This can be a local path or a model identifier from the Model Hub."
         ),
     )
+    parser.add_argument(
+        "--p2p_base_model_path",
+        type=str,
+        default="timbrooks/instruct-pix2pix",
+        help=(
+            "The path to the base model to use for evaluation. This can be a local path or a model identifier from the Model Hub."
+        ),
+    )
     parser.add_argument(
         "--resume_path",
         type=str,
     )
     args = parser.parse_args()
     env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
     if env_local_rank != -1 and env_local_rank != args.local_rank:
         args.local_rank = env_local_rank
     return args
 def image_grid(imgs, rows, cols):
+    assert len(imgs) == rows * cols
     w, h = imgs[0].size
+    grid = Image.new("RGB", size=(cols * w, rows * h))
     for i, img in enumerate(imgs):
         grid.paste(img, box=(i % cols * w, i // cols * h))
     return grid
 # Mask-based CatVTON
 catvton_repo = "zhengchong/CatVTON"
+repo_path = snapshot_download(repo_id=catvton_repo)
+# Pipeline
 pipeline = CatVTONPipeline(
+    base_ckpt=args.base_model_path,
+    attn_ckpt=repo_path,
     attn_ckpt_version="mix",
+    weight_dtype=init_weight_dtype(args.mixed_precision),
+    use_tf32=args.allow_tf32,
+    device='cuda'
 )
+# AutoMasker
+mask_processor = VaeImageProcessor(vae_scale_factor=8, do_normalize=False, do_binarize=True, do_convert_grayscale=True)
 automasker = AutoMasker(
+    densepose_ckpt=os.path.join(repo_path, "DensePose"),
+    schp_ckpt=os.path.join(repo_path, "SCHP"),
     device='cuda',
 )
+@spaces.GPU(duration=120)
 def submit_function(
     person_image,
     cloth_image,
+    cloth_type,
     num_inference_steps,
     guidance_scale,
     seed,
+    show_type
 ):
+    person_image, mask = person_image["background"], person_image["layers"][0]
+    mask = Image.open(mask).convert("L")
+    if len(np.unique(np.array(mask))) == 1:
         mask = None
     else:
+        mask = np.array(mask)
+        mask[mask > 0] = 255
+        mask = Image.fromarray(mask)
+    tmp_folder = args.output_dir
+    date_str = datetime.now().strftime("%Y%m%d%H%M%S")
+    result_save_path = os.path.join(tmp_folder, date_str[:8], date_str[8:] + ".png")
     if not os.path.exists(os.path.join(tmp_folder, date_str[:8])):
+        os.makedirs(os.path.join(tmp_folder, date_str[:8]))
     generator = None
+    if seed != -1:
         generator = torch.Generator(device='cuda').manual_seed(seed)
     person_image = Image.open(person_image).convert("RGB")
     cloth_image = Image.open(cloth_image).convert("RGB")
     person_image = resize_and_crop(person_image, (args.width, args.height))
     # Process mask
     if mask is not None:
+        mask = resize_and_crop(mask, (args.width, args.height))
     else:
         mask = automasker(
             person_image,
             cloth_type
+        )['mask']
+    mask = mask_processor.blur(mask, blur_factor=9)
     # Inference
     # try:
     result_image = pipeline(
     #         "An error occurred. Please try again later: {}".format(e)
     #     )
+    # Post-process
+    masked_person = vis_mask(person_image, mask)
+    save_result_image = image_grid([person_image, masked_person, cloth_image, result_image], 1, 4)
     save_result_image.save(result_save_path)
     if show_type == "result only":
         return result_image
     else:
         else:
             condition_width = width // 3
             conditions = image_grid([person_image, masked_person , cloth_image], 3, 1)
+        conditions = conditions.resize((condition_width, height), Image.NEAREST)
+        new_result_image = Image.new("RGB", (width + condition_width + 5, height))
         new_result_image.paste(conditions, (0, 0))
         new_result_image.paste(result_image, (condition_width + 5, 0))
     return new_result_image
+@spaces.GPU(duration=120)
+def submit_function_p2p(
+    person_image,
+    cloth_image,
+    num_inference_steps,
+    guidance_scale,
+    seed):
+    person_image= person_image["background"]
+    tmp_folder = args.output_dir
+    date_str = datetime.now().strftime("%Y%m%d%H%M%S")
+    result_save_path = os.path.join(tmp_folder, date_str[:8], date_str[8:] + ".png")
+    if not os.path.exists(os.path.join(tmp_folder, date_str[:8])):
+        os.makedirs(os.path.join(tmp_folder, date_str[:8]))
+    generator = None
+    if seed != -1:
+        generator = torch.Generator(device='cuda').manual_seed(seed)
+    person_image = Image.open(person_image).convert("RGB")
+    cloth_image = Image.open(cloth_image).convert("RGB")
+    person_image = resize_and_crop(person_image, (args.width, args.height))
+    cloth_image = resize_and_padding(cloth_image, (args.width, args.height))
+    # Inference
+    try:
+        result_image = pipeline_p2p(
+            image=person_image,
+            condition_image=cloth_image,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale,
+            generator=generator
+        )[0]
+    except Exception as e:
+        raise gr.Error(
+            "An error occurred. Please try again later: {}".format(e)
+        )
+    # Post-process
+    save_result_image = image_grid([person_image, cloth_image, result_image], 1, 3)
+    save_result_image.save(result_save_path)
+    return result_image
+@spaces.GPU(duration=120)
+def submit_function_flux(
+    person_image,
+    cloth_image,
+    cloth_type,
+    num_inference_steps,
+    guidance_scale,
+    seed,
+    show_type
+):
+    # Process image editor input
+    person_image, mask = person_image["background"], person_image["layers"][0]
+    mask = Image.open(mask).convert("L")
+    if len(np.unique(np.array(mask))) == 1:
+        mask = None
+    else:
+        mask = np.array(mask)
+        mask[mask > 0] = 255
+        mask = Image.fromarray(mask)
+    # Set random seed
+    generator = None
+    if seed != -1:
+        generator = torch.Generator(device='cuda').manual_seed(seed)
+    # Process input images
+    person_image = Image.open(person_image).convert("RGB")
+    cloth_image = Image.open(cloth_image).convert("RGB")
+    # Adjust image sizes
+    person_image = resize_and_crop(person_image, (args.width, args.height))
+    cloth_image = resize_and_padding(cloth_image, (args.width, args.height))
+    # Process mask
+    if mask is not None:
+        mask = resize_and_crop(mask, (args.width, args.height))
+    else:
+        mask = automasker(
+            person_image,
+            cloth_type
+        )['mask']
+    mask = mask_processor.blur(mask, blur_factor=9)
+    # Inference
+    result_image = pipeline_flux(
+        image=person_image,
+        condition_image=cloth_image,
+        mask_image=mask,
+        width=args.width,
+        height=args.height,
+        num_inference_steps=num_inference_steps,
+        guidance_scale=guidance_scale,
+        generator=generator
+    ).images[0]
+    # Post-processing
+    masked_person = vis_mask(person_image, mask)
+    # Return result based on show type
+    if show_type == "result only":
+        return result_image
+    else:
+        width, height = person_image.size
+        if show_type == "input & result":
+            condition_width = width // 2
+            conditions = image_grid([person_image, cloth_image], 2, 1)
+        else:
+            condition_width = width // 3
+            conditions = image_grid([person_image, masked_person, cloth_image], 3, 1)
+        conditions = conditions.resize((condition_width, height), Image.NEAREST)
+        new_result_image = Image.new("RGB", (width + condition_width + 5, height))
+        new_result_image.paste(conditions, (0, 0))
+        new_result_image.paste(result_image, (condition_width + 5, 0))
+        return new_result_image
 def person_example_fn(image_path):
     return image_path
 def app_gradio():
     with gr.Blocks(title="CatVTON") as demo:
         gr.Markdown(HEADER)
+        with gr.Tab("Mask-based & SD1.5"):
             with gr.Row():
                 with gr.Column(scale=1, min_width=350):
                     with gr.Row():
                         image_path = gr.Image(
                             type="filepath",
                             interactive=True, label="Person Image", type="filepath"
                         )
                     with gr.Row():
                         with gr.Column(scale=1, min_width=230):
                             cloth_image = gr.Image(
                                 value="upper",
                             )
                     submit = gr.Button("Submit")
                     gr.Markdown(
                         '<center><span style="color: #FF0000">!!! Click only Once, Wait for Delay !!!</span></center>'
                     )
                     gr.Markdown(
                         '<span style="color: #808080; font-size: small;">Advanced options can adjust details:<br>1. `Inference Step` may enhance details;<br>2. `CFG` is highly correlated with saturation;<br>3. `Random seed` may improve pseudo-shadow.</span>'
                     )
                             value="input & mask & result",
                         )
                 with gr.Column(scale=2, min_width=500):
                     result_image = gr.Image(interactive=False, label="Result")
                     with gr.Row():
                         # Photo Examples
                     person_example_fn, inputs=image_path, outputs=person_image
                 )
                 submit.click(
                     submit_function,
                     [
                     ],
                     result_image,
                 )
     demo.queue().launch(share=True, show_error=True)
 if __name__ == "__main__":