Spaces:

MnLgt
/

YOLO-Human-Parse

Running

App Files Files Community

MnLgt commited on Sep 27, 2024

Commit

95eae85

1 Parent(s): 0b24893

updated yolo model

Browse files

Files changed (9) hide show

.gitignore +1 -1
app.py +21 -83
hp/utils.py +81 -0
hp/visualizer.py +102 -0
hp/yolo_results.py +44 -0
sample_images/image_two.jpg +0 -0
yolo-human-parse-v2.pt +3 -0
yolo/BodyMask.py +0 -210
yolo/utils.py +0 -298

.gitignore CHANGED Viewed

@@ -1,4 +1,4 @@
 gradio_cached_examples/
 checkpoint-*
 */example.ipynb

+*/**.DS_Store
 gradio_cached_examples/
 checkpoint-*
 */example.ipynb

app.py CHANGED Viewed

@@ -1,99 +1,37 @@
 import gradio as gr
 import os
-from ultralytics import YOLO
-from yolo.BodyMask import BodyMask
 import numpy as np
 import matplotlib.pyplot as plt
 from matplotlib import patches
-from skimage.transform import resize
 from PIL import Image
 import io
-model_id = os.path.abspath("yolo-human-parse-epoch-125.pt")
-def display_image_with_masks(image, results, cols=4):
-    # Convert PIL Image to numpy array
-    image_np = np.array(image)
-    # Check image dimensions
-    if image_np.ndim != 3 or image_np.shape[2] != 3:
-        raise ValueError("Image must be a 3-dimensional array with 3 color channels")
-    # Number of masks
-    n = len(results)
-    rows = (n + cols - 1) // cols  # Calculate required number of rows
-    # Setting up the plot
-    fig, axs = plt.subplots(rows, cols, figsize=(5 * cols, 5 * rows))
-    axs = np.array(axs).reshape(-1)  # Flatten axs array for easy indexing
-    for i, result in enumerate(results):
-        mask = result["mask"]
-        label = result["label"]
-        score = float(result["score"])
-        # Convert PIL mask to numpy array and resize if necessary
-        mask_np = np.array(mask)
-        if mask_np.shape != image_np.shape[:2]:
-            mask_np = resize(
-                mask_np, image_np.shape[:2], mode="constant", anti_aliasing=False
-            )
-            mask_np = (mask_np > 0.5).astype(
-                np.uint8
-            )  # Threshold back to binary after resize
-        # Create an overlay where mask is True
-        overlay = np.zeros_like(image_np)
-        overlay[mask_np > 0] = [0, 0, 255]  # Applying blue color on the mask area
-        # Combine the image and the overlay
-        combined = image_np.copy()
-        indices = np.where(mask_np > 0)
-        combined[indices] = combined[indices] * 0.5 + overlay[indices] * 0.5
-        # Show the combined image
-        ax = axs[i]
-        ax.imshow(combined)
-        ax.axis("off")
-        ax.set_title(f"Label: {label}, Score: {score:.2f}", fontsize=12)
-        rect = patches.Rectangle(
-            (0, 0),
-            image_np.shape[1],
-            image_np.shape[0],
-            linewidth=1,
-            edgecolor="r",
-            facecolor="none",
-        )
-        ax.add_patch(rect)
-    # Hide unused subplots if the total number of masks is not a multiple of cols
-    for idx in range(i + 1, rows * cols):
-        axs[idx].axis("off")
-    plt.tight_layout()
-    # Save the plot to a bytes buffer
-    buf = io.BytesIO()
-    plt.savefig(buf, format="png")
-    buf.seek(0)
-    # Clear the current figure
-    plt.close(fig)
-    return buf
-def perform_segmentation(input_image):
-    bm = BodyMask(input_image, model_id=model_id, resize_to=640)
-    if bm.body_mask is None:
-        return input_image  # Return the original image if no mask is found
-    results = bm.results
-    buf = display_image_with_masks(input_image, results)
-    # Convert BytesIO to PIL Image
-    img = Image.open(buf)
-    return img
 # Get example images

 import gradio as gr
 import os
+from hp.yolo_results import YOLOResults
 import numpy as np
 import matplotlib.pyplot as plt
 from matplotlib import patches
 from PIL import Image
 import io
+from functools import lru_cache
+import logging
+from ultralytics import YOLO
+from hp.utils import load_resize_image
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+model_id = os.path.abspath("yolo-human-parse-v2.pt")
+@lru_cache
+def get_model(model_id=model_id):
+    return YOLO(model_id, task="segment")
+def perform_segmentation(image):
+    model = get_model()
+    image = load_resize_image(image, 1024)
+    imgsz = max(image.size)
+    result = model(image, imgsz=imgsz, retina_masks=True)
+    if not bool(result):
+        logger.info("No Masks or Boxes Found")
+        return image
+    result = YOLOResults(image, result)
+    return result.visualize(return_image=True)
 # Get example images

hp/utils.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import os
+import random
+from typing import List, Union
+import numpy as np
+from PIL import Image, ImageOps
+from ultralytics import YOLO
+from hp.visualizer import visualizer
+def resize_image_pil(image_pil, max_size=1024):
+    # Ensure image is in RGB
+    if image_pil.mode != "RGB":
+        image_pil = image_pil.convert("RGB")
+    # Calculate new dimensions preserving aspect ratio
+    width, height = image_pil.size
+    scale = min(max_size / width, max_size / height)
+    new_width = int(width * scale)
+    new_height = int(height * scale)
+    image_pil = image_pil.resize((new_width, new_height), Image.LANCZOS)
+    # Calculate padding needed to reach 1024x1024
+    pad_width = (max_size - new_width) // 2
+    pad_height = (max_size - new_height) // 2
+    # Apply padding symmetrically
+    image_pil = ImageOps.expand(
+        image_pil,
+        border=(
+            pad_width,
+            pad_height,
+            max_size - new_width - pad_width,
+            max_size - new_height - pad_height,
+        ),
+        fill=(0, 0, 0),
+    )
+    return image_pil
+def load_resize_image(image_path: str | Image.Image, size: int) -> Image.Image:
+    if isinstance(image_path, str):
+        image_pil = Image.open(image_path).convert("RGB")
+    else:
+        image_pil = image_path.convert("RGB")
+    image_pil = resize_image_pil(image_pil, size)
+    return image_pil
+def unload_mask(mask):
+    mask = mask.cpu().numpy().squeeze()
+    mask = mask.astype(np.uint8) * 255
+    return Image.fromarray(mask)
+def unload_masks(masks):
+    return [unload_mask(mask) for mask in masks]
+def unload_box(box):
+    return box.cpu().numpy().tolist()
+def unload_boxes(boxes):
+    return [unload_box(box) for box in boxes]
+def format_scores(scores):
+    return scores.squeeze().cpu().numpy().tolist()
+def format_results(labels, scores, boxes, masks):
+    results_dict = []
+    for row in zip(labels, scores, boxes, masks):
+        label, score, box, mask = row
+        results_row = dict(label=label, score=score, mask=mask, box=box)
+        results_dict.append(results_row)
+    return results_dict

hp/visualizer.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import matplotlib.patches as patches
+import matplotlib.pyplot as plt
+import numpy as np
+from PIL import Image
+import io
+def visualizer(
+    image,
+    results,
+    box_label="box",
+    mask_label="mask",
+    prompt_label="prompt",
+    score_label="score",
+    cols=4,
+    return_image=False,
+    **kwargs,
+):
+    # Convert PIL Image to numpy array
+    image_np = np.array(image)
+    # Check image dimensions
+    if image_np.ndim != 3:
+        raise ValueError("Image must be a 3-dimensional array")
+    # Number of results
+    n = len(results)
+    rows = (n + cols - 1) // cols  # Calculate required number of rows
+    # Setting up the plot
+    fig, axs = plt.subplots(rows, cols, figsize=(5 * cols, 5 * rows))
+    if n == 1:
+        axs = np.array([[axs]])
+    elif rows == 1:
+        axs = np.array([axs])
+    else:
+        axs = axs.reshape(rows, cols)
+    for i, result in enumerate(results):
+        label = result[prompt_label]
+        score = float(result[score_label])
+        row = i // cols
+        col = i % cols
+        # Create a copy of the original image
+        combined = image_np.copy()
+        # Draw mask if present
+        if mask_label in result:
+            mask = result[mask_label]
+            # Convert PIL mask to numpy array
+            mask_np = np.array(mask)
+            # Check mask dimensions
+            if mask_np.ndim != 2:
+                raise ValueError("Mask must be a 2-dimensional array")
+            # Create an overlay where mask is True
+            overlay = np.zeros_like(image_np)
+            overlay[mask_np > 0] = [0, 0, 255]  # Applying blue color on the mask area
+            # Combine the image and the overlay
+            indices = np.where(mask_np > 0)
+            combined[indices] = combined[indices] * 0.5 + overlay[indices] * 0.5
+        # Show the combined image
+        ax = axs[row, col]
+        ax.imshow(combined)
+        ax.axis("off")
+        ax.set_title(f"Label: {label}, Score: {score:.2f}", fontsize=12)
+        # Draw bounding box if present
+        if box_label in result:
+            bbox = result[box_label]
+            x1, y1, x2, y2 = bbox
+            rect = patches.Rectangle(
+                (x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor="r", facecolor="none"
+            )
+            ax.add_patch(rect)
+    # Hide unused subplots if the total number of results is not a multiple of cols
+    for idx in range(i + 1, rows * cols):
+        row = idx // cols
+        col = idx % cols
+        axs[row, col].axis("off")
+    plt.tight_layout()
+    if return_image:
+        # Save the plot to a bytes buffer
+        buf = io.BytesIO()
+        plt.savefig(buf, format="png")
+        buf.seek(0)
+        # Clear the current figure
+        plt.close(fig)
+        # Return the image as a PIL Image object
+        return Image.open(buf)
+    else:
+        plt.show()

hp/yolo_results.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from typing import List, Union
+from PIL import Image
+from ultralytics import YOLO
+from hp.visualizer import visualizer
+from .utils import *
+class YOLOResults:
+    def __init__(self, image: Union[Image.Image | str], result: List):
+        self.image = image
+        self.masks = None
+        self.boxes = None
+        self.scores = None
+        self.labels = None
+        self.labels_dict = None
+        self.result = self.unload(result[0])
+        self.formatted_results = format_results(
+            self.labels,
+            self.scores,
+            self.boxes,
+            self.masks,
+        )
+    def unload(self, result):
+        assert (
+            bool(result) and hasattr(result, "masks") and hasattr(result, "boxes")
+        ), "No Masks or Boxes Found"
+        self.masks = unload_masks(result.masks.data)
+        self.boxes = unload_boxes(result.boxes.xyxy)
+        self.scores = format_scores(result.boxes.conf)
+        self.labels = list(result.names.values())
+        self.labels_dict = result.names
+        det_ids = result.boxes.cls
+        det_ids = [int(l.item()) for l in det_ids]
+        self.labels = [self.labels_dict[i] for i in det_ids]
+    def visualize(self, return_image=False):
+        return visualizer(
+            self.image,
+            self.formatted_results,
+            prompt_label="label",
+            return_image=return_image,
+        )

sample_images/image_two.jpg CHANGED Viewed

yolo-human-parse-v2.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c1a5e777a3e980c26d70067246e7eb11749294af43fa355ba90af4c3076d849f
+size 13498800

yolo/BodyMask.py DELETED Viewed

@@ -1,210 +0,0 @@
-import os
-from functools import lru_cache
-from typing import List
-import cv2
-import numpy as np
-from diffusers.utils import load_image
-from PIL import Image, ImageChops, ImageFilter
-from ultralytics import YOLO
-from .utils import *
-def dilate_mask(mask, dilate_factor=6, blur_radius=2, erosion_factor=2):
-    if not mask:
-        return None
-    # Convert PIL image to NumPy array if necessary
-    if isinstance(mask, Image.Image):
-        mask = np.array(mask)
-    # Ensure mask is in uint8 format
-    mask = mask.astype(np.uint8)
-    # Apply dilation
-    kernel = np.ones((dilate_factor, dilate_factor), np.uint8)
-    dilated_mask = cv2.dilate(mask, kernel, iterations=1)
-    # Apply erosion for refinement
-    kernel = np.ones((erosion_factor, erosion_factor), np.uint8)
-    eroded_mask = cv2.erode(dilated_mask, kernel, iterations=1)
-    # Apply Gaussian blur to smooth the edges
-    blurred_mask = cv2.GaussianBlur(
-        eroded_mask, (2 * blur_radius + 1, 2 * blur_radius + 1), 0
-    )
-    # Convert back to PIL image
-    smoothed_mask = Image.fromarray(blurred_mask).convert("L")
-    # Optionally, apply an additional blur for extra smoothness using PIL
-    smoothed_mask = smoothed_mask.filter(ImageFilter.GaussianBlur(radius=blur_radius))
-    return smoothed_mask
-@lru_cache(maxsize=1)
-def get_model(model_id):
-    model = YOLO(model=model_id)
-    return model
-def combine_masks(masks: List[dict], labels: List[str], is_label=True) -> Image.Image:
-    """
-    Combine masks with the specified labels into a single mask, optimized for speed and non-overlapping of excluded masks.
-    Parameters:
-    - masks (List[dict]): A list of dictionaries, each containing the mask under a 'mask' key and its label under a 'label' key.
-    - labels (List[str]): A list of labels to include in the combination.
-    Returns:
-    - Image.Image: The combined mask as a PIL Image object, or None if no masks are combined.
-    """
-    labels_set = set(labels)  # Convert labels list to a set for O(1) lookups
-    # Filter out any masks that do not have a label key
-    masks = [mask for mask in masks if "label" in mask]
-    # Filter and convert mask images based on the specified labels
-    mask_images = [
-        mask["mask"].convert("L")
-        for mask in masks
-        if (mask["label"] in labels_set) == is_label
-    ]
-    # Ensure there is at least one mask to combine
-    if not mask_images:
-        return None  # Or raise an appropriate error, e.g., ValueError("No masks found for the specified labels.")
-    # Initialize the combined mask with the first mask
-    combined_mask = mask_images[0]
-    # Combine the remaining masks with the existing combined_mask using a bitwise OR operation to ensure non-overlap
-    for mask in mask_images[1:]:
-        combined_mask = ImageChops.lighter(combined_mask, mask)
-    return combined_mask
-body_labels = ["hair", "face", "arm", "hand", "leg", "foot", "outfit"]
-class BodyMask:
-    def __init__(
-        self,
-        image_path,
-        model_id,
-        labels=body_labels,
-        overlay="mask",
-        widen_box=0,
-        elongate_box=0,
-        resize_to=640,
-        dilate_factor=0,
-        is_label=False,
-        resize_to_nearest_eight=False,
-        verbose=True,
-        remove_overlap=True,
-    ):
-        self.image_path = image_path
-        self.image = self.get_image(
-            resize_to=resize_to, resize_to_nearest_eight=resize_to_nearest_eight
-        )
-        self.labels = labels
-        self.is_label = is_label
-        self.model_id = model_id
-        self.model = get_model(self.model_id)
-        self.model_labels = self.model.names
-        self.verbose = verbose
-        self.results = self.get_results()
-        self.dilate_factor = dilate_factor
-        self.body_mask = self.get_body_mask()
-        self.box = self.get_bounding_box()
-        self.body_box = self.get_body_box(
-            remove_overlap=remove_overlap, widen=widen_box, elongate=elongate_box
-        )
-        self.overlay = self.create_overlay(overlay)
-    def get_image(self, resize_to, resize_to_nearest_eight):
-        image = load_image(self.image_path)
-        if resize_to:
-            image = resize_preserve_aspect_ratio(image, resize_to)
-        if resize_to_nearest_eight:
-            image = resize_image_to_nearest_eight(image)
-        return image
-    def get_results(self):
-        imgsz = max(self.image.size)
-        results = self.model(
-            self.image, retina_masks=True, imgsz=imgsz, verbose=self.verbose
-        )[0]
-        masks, boxes, scores, phrases = unload(results, self.model_labels)
-        results = format_results(
-            masks, boxes, scores, phrases, self.model_labels, person_masks_only=False
-        )
-        masks_to_filter = ["hair"]
-        results = filter_highest_score(results, ["hair", "face", "phone"])
-        return results
-    def get_body_mask(self):
-        body_mask = combine_masks(self.results, self.labels, self.is_label)
-        if body_mask is not None:
-            return dilate_mask(body_mask, self.dilate_factor)
-        return None
-    def get_bounding_box(self):
-        if self.body_mask is None:
-            return None
-        return get_bounding_box(self.body_mask)
-    def get_body_box(self, remove_overlap=True, widen=0, elongate=0):
-        if self.body_mask is None or self.box is None:
-            return None
-        body_box = get_bounding_box_mask(self.body_mask, widen=widen, elongate=elongate)
-        if remove_overlap and body_box is not None:
-            body_box = self.remove_overlap(body_box)
-        return body_box
-    def create_overlay(self, overlay_type):
-        if self.body_box is not None and overlay_type == "box":
-            return overlay_mask(self.image, self.body_box, opacity=0.9, color="red")
-        elif self.body_mask is not None:
-            return overlay_mask(self.image, self.body_mask, opacity=0.9, color="red")
-        return self.image
-    def remove_overlap(self, body_box):
-        if body_box is None:
-            return None
-        box_array = np.array(body_box)
-        mask = self.combine_masks(mask_labels=self.labels, is_label=True)
-        if mask is None:
-            return body_box
-        mask_array = np.array(mask)
-        box_array[mask_array == 255] = 0
-        return Image.fromarray(box_array)
-    def combine_masks(self, mask_labels: List, no_labels=None, is_label=True):
-        if not is_label:
-            mask_labels = [
-                phrase for phrase in self.phrases if phrase not in mask_labels
-            ]
-        masks = [
-            row.get("mask") for row in self.results if row.get("label") in mask_labels
-        ]
-        if len(masks) == 0:
-            return None
-        combined_mask = masks[0]
-        for mask in masks[1:]:
-            combined_mask = ImageChops.lighter(combined_mask, mask)
-        return combined_mask
-    def display_results(self):
-        if not self.results:
-            print("No results to display.")
-            return
-        cols = min(len(self.results), 4)
-        display_image_with_masks(self.image, self.results, cols=cols)
-    def get_mask(self, mask_label):
-        if mask_label not in self.phrases:
-            print(f"Mask label '{mask_label}' not found in results.")
-            return None
-        return [f for f in self.results if f.get("label") == mask_label]

yolo/utils.py DELETED Viewed

@@ -1,298 +0,0 @@
-import matplotlib.patches as patches
-import matplotlib.pyplot as plt
-import numpy as np
-from PIL import Image, ImageDraw
-def unload_mask(mask):
-    mask = mask.cpu().numpy().squeeze()
-    mask = mask.astype(np.uint8) * 255
-    return Image.fromarray(mask)
-def unload_box(box):
-    return box.cpu().numpy().tolist()
-def masks_overlap(mask1, mask2):
-    return np.any(np.logical_and(mask1, mask2))
-def remove_non_person_masks(person_mask, formatted_results):
-    return [
-        f
-        for f in formatted_results
-        if f.get("label") == "person" or masks_overlap(person_mask, f.get("mask"))
-    ]
-def format_masks(masks):
-    return [unload_mask(mask) for mask in masks]
-def format_boxes(boxes):
-    return [unload_box(box) for box in boxes]
-def format_scores(scores):
-    return scores.cpu().numpy().tolist()
-def unload(result, labels_dict):
-    masks = format_masks(result.masks.data)
-    boxes = format_boxes(result.boxes.xyxy)
-    scores = format_scores(result.boxes.conf)
-    labels = result.boxes.cls
-    labels = [int(label.item()) for label in labels]
-    phrases = [labels_dict[label] for label in labels]
-    return masks, boxes, scores, phrases
-def format_results(masks, boxes, scores, labels, labels_dict, person_masks_only=True):
-    if isinstance(list(labels_dict.keys())[0], int):
-        labels_dict = {v: k for k, v in labels_dict.items()}
-    # check that the person mask is present
-    if person_masks_only:
-        assert "person" in labels, "Person mask not present in results"
-    results_dict = []
-    for row in zip(labels, scores, boxes, masks):
-        label, score, box, mask = row
-        label_id = labels_dict[label]
-        results_row = dict(
-            label=label, score=score, mask=mask, box=box, label_id=label_id
-        )
-        results_dict.append(results_row)
-    results_dict = sorted(results_dict, key=lambda x: x["label"])
-    if person_masks_only:
-        # Get the person mask
-        person_mask = [f for f in results_dict if f.get("label") == "person"][0]["mask"]
-        assert person_mask is not None, "Person mask not found in results"
-        # Remove any results that do no overlap with the person
-        results_dict = remove_non_person_masks(person_mask, results_dict)
-    return results_dict
-def filter_highest_score(results, labels):
-    """
-    Filter results to remove entries with lower scores for specified labels.
-    Args:
-        results (list): List of dictionaries containing 'label', 'score', and other keys.
-        labels (list): List of labels to filter.
-    Returns:
-        list: Filtered results with only the highest score for each specified label.
-    """
-    # Dictionary to keep track of the highest score entry for each label
-    label_highest = {}
-    # First pass: identify the highest score for each label
-    for result in results:
-        label = result["label"]
-        if label in labels:
-            if (
-                label not in label_highest
-                or result["score"] > label_highest[label]["score"]
-            ):
-                label_highest[label] = result
-    # Second pass: construct the filtered list while preserving the order
-    filtered_results = []
-    seen_labels = set()
-    for result in results:
-        label = result["label"]
-        if label in labels:
-            if label in seen_labels:
-                continue
-            if result == label_highest[label]:
-                filtered_results.append(result)
-                seen_labels.add(label)
-        else:
-            filtered_results.append(result)
-    return filtered_results
-def display_image_with_masks(image, results, cols=4, return_images=False):
-    # Convert PIL Image to numpy array
-    image_np = np.array(image)
-    # Check image dimensions
-    if image_np.ndim != 3 or image_np.shape[2] != 3:
-        raise ValueError("Image must be a 3-dimensional array with 3 color channels")
-    # Number of masks
-    n = len(results)
-    rows = (n + cols - 1) // cols  # Calculate required number of rows
-    # Setting up the plot
-    fig, axs = plt.subplots(rows, cols, figsize=(5 * cols, 5 * rows))
-    axs = np.array(axs).reshape(-1)  # Flatten axs array for easy indexing
-    for i, result in enumerate(results):
-        mask = result["mask"]
-        label = result["label"]
-        score = float(result["score"])
-        # Convert PIL mask to numpy array and resize if necessary
-        mask_np = np.array(mask)
-        if mask_np.shape != image_np.shape[:2]:
-            mask_np = resize(
-                mask_np, image_np.shape[:2], mode="constant", anti_aliasing=False
-            )
-            mask_np = (mask_np > 0.5).astype(
-                np.uint8
-            )  # Threshold back to binary after resize
-        # Create an overlay where mask is True
-        overlay = np.zeros_like(image_np)
-        overlay[mask_np > 0] = [0, 0, 255]  # Applying blue color on the mask area
-        # Combine the image and the overlay
-        combined = image_np.copy()
-        indices = np.where(mask_np > 0)
-        combined[indices] = combined[indices] * 0.5 + overlay[indices] * 0.5
-        # Show the combined image
-        ax = axs[i]
-        ax.imshow(combined)
-        ax.axis("off")
-        ax.set_title(f"Label: {label}, Score: {score:.2f}", fontsize=12)
-        rect = patches.Rectangle(
-            (0, 0),
-            image_np.shape[1],
-            image_np.shape[0],
-            linewidth=1,
-            edgecolor="r",
-            facecolor="none",
-        )
-        ax.add_patch(rect)
-    # Hide unused subplots if the total number of masks is not a multiple of cols
-    for idx in range(i + 1, rows * cols):
-        axs[idx].axis("off")
-    plt.tight_layout()
-    plt.show()
-def get_bounding_box(mask):
-    if mask is None or not isinstance(mask, np.ndarray):
-        return None
-    # Check if the mask is empty
-    if mask.size == 0 or np.all(mask == 0):
-        return None
-    # Find the bounding box
-    rows = np.any(mask, axis=1)
-    cols = np.any(mask, axis=0)
-    if not np.any(rows) or not np.any(cols):
-        return None
-    rmin, rmax = np.where(rows)[0][[0, -1]]
-    cmin, cmax = np.where(cols)[0][[0, -1]]
-    return (int(cmin), int(rmin), int(cmax), int(rmax))
-def get_bounding_box_mask(segmentation_mask, widen=0, elongate=0):
-    # Convert the PIL segmentation mask to a NumPy array
-    mask_array = np.array(segmentation_mask)
-    # Find the coordinates of the non-zero pixels
-    non_zero_y, non_zero_x = np.nonzero(mask_array)
-    # Calculate the bounding box coordinates
-    min_x, max_x = np.min(non_zero_x), np.max(non_zero_x)
-    min_y, max_y = np.min(non_zero_y), np.max(non_zero_y)
-    if widen > 0:
-        min_x = max(0, min_x - widen)
-        max_x = min(mask_array.shape[1], max_x + widen)
-    if elongate > 0:
-        min_y = max(0, min_y - elongate)
-        max_y = min(mask_array.shape[0], max_y + elongate)
-    # Create a new blank image for the bounding box mask
-    bounding_box_mask = Image.new("1", segmentation_mask.size)
-    # Draw the filled bounding box on the blank image
-    draw = ImageDraw.Draw(bounding_box_mask)
-    draw.rectangle([(min_x, min_y), (max_x, max_y)], fill=1)
-    return bounding_box_mask
-colors = {
-    "blue": (136, 207, 249),
-    "red": (255, 0, 0),
-    "green": (0, 255, 0),
-    "yellow": (255, 255, 0),
-    "purple": (128, 0, 128),
-    "cyan": (0, 255, 255),
-    "magenta": (255, 0, 255),
-    "orange": (255, 165, 0),
-    "lime": (50, 205, 50),
-    "pink": (255, 192, 203),
-    "brown": (139, 69, 19),
-    "gray": (128, 128, 128),
-    "black": (0, 0, 0),
-    "white": (255, 255, 255),
-    "gold": (255, 215, 0),
-    "silver": (192, 192, 192),
-    "beige": (245, 245, 220),
-    "navy": (0, 0, 128),
-    "maroon": (128, 0, 0),
-    "olive": (128, 128, 0),
-}
-def overlay_mask(image, mask, opacity=0.5, color="blue"):
-    """
-    Takes in a PIL image and a PIL boolean image mask. Overlay the mask on the image
-    and color the mask with a low opacity blue with hex #88CFF9.
-    """
-    # Convert the boolean mask to an image with alpha channel
-    alpha = mask.convert("L").point(lambda x: 255 if x == 255 else 0, mode="1")
-    # Choose the color
-    r, g, b = colors[color]
-    color_mask = Image.new("RGBA", mask.size, (r, g, b, int(opacity * 255)))
-    mask_rgba = Image.composite(
-        color_mask, Image.new("RGBA", mask.size, (0, 0, 0, 0)), alpha
-    )
-    # Create a new RGBA image to overlay the mask on
-    overlay = Image.new("RGBA", image.size, (0, 0, 0, 0))
-    # Paste the mask onto the overlay
-    overlay.paste(mask_rgba, (0, 0))
-    # Create a new image to return by blending the original image and the overlay
-    result = Image.alpha_composite(image.convert("RGBA"), overlay)
-    # Convert the result back to the original mode and return it
-    return result.convert(image.mode)
-def resize_preserve_aspect_ratio(image, max_side=512):
-    width, height = image.size
-    scale = min(max_side / width, max_side / height)
-    new_width = int(width * scale)
-    new_height = int(height * scale)
-    return image.resize((new_width, new_height))
-def round_to_nearest_eigth(value):
-    return int((value // 8 * 8))
-def resize_image_to_nearest_eight(image):
-    width, height = image.size
-    width, height = round_to_nearest_eigth(width), round_to_nearest_eigth(height)
-    image = image.resize((width, height))
-    return image