Spaces:

baptistecolle
/

santa

Sleeping

App Files Files Community

baptistecolle HF staff commited on Jan 3, 2024

Commit

e4bf926

unverified ·

1 Parent(s): a7cc668

first version

Browse files

Files changed (2) hide show

app.py +337 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,337 @@

+from enum import Enum
+import numpy as np
+import gradio as gr
+import torch
+from PIL import Image
+from transformers import DPTImageProcessor, DPTForDepthEstimation
+from typing import List, Tuple
+import random
+from PIL import ImageDraw, ImageFont
+from gradio.components import Image as grImage
+import mediapipe as mp
+processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
+model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
+detector = mp.solutions.face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.5)
+class Placement(Enum):
+    CENTER = 0
+    TOP = 1
+class FaceKeypointsLabel(Enum):
+    OTHER = 0
+    NOSE = 1
+class Keypoints:
+    def __init__(self, x: float, y: float, label: FaceKeypointsLabel):
+        """
+        :param x: x coordinate of the keypoint, normalized between 0 and 1
+        :param y: y coordinate of the keypoint, normalized between 0 and 1
+        """
+        self.x = x
+        self.y = y
+        self.label = label
+class BoundingBox:
+    def __init__(self, x_min: int, y_min: int, width: int, height: int):
+        self.x_min = x_min
+        self.y_min = y_min
+        self.width = width
+        self.height = height
+class FaceDetectionResult:
+    """
+    A class to represent the result of a face detection
+    """
+    def __init__(self, bounding_box : BoundingBox, keypoints: List[Keypoints]):
+        self.bounding_box  = bounding_box
+        self.keypoints = keypoints
+def detect_face(image: Image) -> List[any]:
+    """
+    Use mediapipe to detect faces in an image
+    """
+    result = detector.process(np.array(image))
+    if result.detections is None:
+        return []
+    return result.detections
+def predict_depth(image: Image) -> np.ndarray:
+    """
+    Predict depth for an image
+    """
+    inputs = processor(images=image, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model(**inputs)
+        predicted_depth = outputs.predicted_depth
+    # Interpolate to original size
+    prediction = torch.nn.functional.interpolate(
+        predicted_depth.unsqueeze(1),
+        size=image.size[::-1],
+        mode="bicubic",
+        align_corners=False,
+    )
+    output = prediction.squeeze().cpu().numpy()
+    return (output * 255 / np.max(output)).astype("uint8")
+def estimate_depth_at_points(depth_map: np.ndarray, coordinates: List[Tuple[int, int]]) -> List[float]:
+    """
+    Get the depth at a given coordinates
+    """
+    depth_estimates = []
+    # Iterate through the given coordinates and estimate depth at each point
+    for x, y in coordinates:
+        depth_estimate = depth_map[y, x]  # Access depth at the given point
+        depth_estimates.append(depth_estimate)
+    return depth_estimates
+class Person:
+    """
+    A class to represent a person in an image
+    """
+    def __init__(self, nose_x: int, nose_y: int, head_width: int, head_height: int, middle_top_head_x: int, middle_top_head_y: int):
+        self.nose_x = nose_x
+        self.nose_y = nose_y
+        self.head_width = head_width
+        self.head_height = head_height
+        self.middle_top_head_x = middle_top_head_x
+        self.middle_top_head_y = middle_top_head_y
+        self.nose_width = int(head_width / 5)
+        self.nose_height = int(head_height / 3)
+def extract_persons(face_detection_results: List[FaceDetectionResult], image: Image) -> List[Person]:
+    """
+    Extract a list of people from a face detection result
+    """
+    persons = []
+    for face_result in face_detection_results:
+        bbox = face_result.bounding_box
+        keypoints = face_result.keypoints
+        # Assuming the nose is the first keypoint in the list.
+        # You might need to adjust this based on how keypoints are ordered.
+        for keypoint in keypoints:
+            if keypoint.label == FaceKeypointsLabel.NOSE:
+                nose_keypoint = keypoint
+                break
+        nose_x = int(nose_keypoint.x * image.width)
+        nose_y = int(nose_keypoint.y * image.height)
+        # Bounding box details
+        middle_top_head_x = int(bbox.x_min + bbox.width // 2)
+        middle_top_head_y = bbox.y_min
+        head_width = bbox.width
+        head_height = bbox.height
+        # Create and add Person object
+        person = Person(nose_x, nose_y, head_width, head_height, middle_top_head_x, middle_top_head_y)
+        persons.append(person)
+    return persons
+def add_mask(image: Image, mask: Image, coordinate: Tuple[int, int], size: Tuple[int, int], placement: Placement) -> Image:
+    """
+    Add a mask (a static image) to an image
+    """
+    # maintain aspect ratio
+    if len(size) == 1:
+        height = mask.height
+        width = mask.width
+        ratio = height / width
+        size = (size[0], int(size[0] * ratio))
+    if placement == Placement.CENTER:
+        coordinate = (coordinate[0] - size[0] // 2, coordinate[1] - size[1] // 2)
+    elif placement == Placement.TOP:
+        coordinate = (coordinate[0] - size[0] // 2, coordinate[1] - size[1])
+    mask = mask.resize(size)
+    image.paste(mask, coordinate, mask)
+    return image
+def draw_attributes(image: Image, persons: List[Person]) -> Image:
+    """
+    Debug function to the face recognition attributes on an image
+    """
+    draw = ImageDraw.Draw(image)
+    font = ImageFont.load_default()
+    for person in persons:
+        # Draw a circle at the nose position
+        draw.ellipse([(person.nose_x - 5, person.nose_y - 5), (person.nose_x + 5, person.nose_y + 5)], fill=(0, 255, 0))
+        # Draw the head rectangle
+        draw.rectangle([(person.middle_top_head_x - person.head_width // 2, person.middle_top_head_y),
+                        (person.middle_top_head_x + person.head_width // 2, person.middle_top_head_y + person.head_height)],
+                       outline=(0, 255, 0))
+        # Put text for dimensions
+        draw.text((person.middle_top_head_x, person.middle_top_head_y - 20), f"Width: {person.head_width}, Height: {person.head_height}", fill=(255, 255, 255), font=font)
+        # put location of nose
+        draw.text((person.nose_x, person.nose_y + 10), f"({person.nose_x}, {person.nose_y})", fill=(255, 255, 255), font=font)
+        # draw dot at middle top head
+        draw.ellipse([(person.middle_top_head_x - 5, person.middle_top_head_y - 5), (person.middle_top_head_x + 5, person.middle_top_head_y + 5)], fill=(255, 0, 0))
+    return image
+def apply_reindeer_mask(image: Image, person: Person) -> Image:
+    """
+    Apply a reindeer mask to a person in an image
+    """
+    reindeer_nose = Image.open("cv/mask/reindeer_nose.png")
+    reindeer_antlers = Image.open("cv/mask/reindeer_antlers.png")
+    reindeer_nose_coordinate = (person.nose_x, person.nose_y)
+    reindeer_nose_size = (person.nose_height, person.nose_height)
+    image = add_mask(image, reindeer_nose, reindeer_nose_coordinate, reindeer_nose_size, Placement.CENTER)
+    reindeer_antlers_size = (person.head_width, )
+    reindeer_antlers_coordinate = (person.middle_top_head_x, person.middle_top_head_y)
+    image = add_mask(image, reindeer_antlers, reindeer_antlers_coordinate, reindeer_antlers_size, Placement.TOP)
+    return image
+def apply_santa_hat_mask(image: Image, person: Person) -> Image:
+    """
+    Apply a santa hat mask to a person in an image
+    """
+    santa_hat = Image.open("cv/mask/santa_hat.png")
+    santa_hat_size = (person.head_width, )
+    santa_hat_coordinate = (person.middle_top_head_x, person.middle_top_head_y)
+    image = add_mask(image, santa_hat, santa_hat_coordinate, santa_hat_size, Placement.TOP)
+    return image
+def add_text(image: Image, text: str, font_size: int = 30) -> Image:
+    """
+    Add text to an image
+    """
+    draw = ImageDraw.Draw(image)
+    # Calculate text width and height for centering
+    text_width, text_height = draw.textsize(text)
+    text_x = (image.width - text_width) // 2
+    text_y = (image.height - text_height) // 2
+    draw.text((text_x, text_y), text, fill=(255, 0, 0))
+    return image
+def apply_random_mask(image: Image, person: Person) -> Image:
+    """
+    Apply a random mask to a person in an image
+    """
+    mask = random.choice([apply_santa_hat_mask, apply_reindeer_mask])
+    image = mask(image, person)
+    return image
+def process_image(image : Image):
+    """
+    The full pipeline that take an image and returns an image with more christmas spirit :)
+    """
+    # Potential improvement this could be done in parallel
+    depth_result = predict_depth(image)
+    detections = detect_face(image)
+    face_detection_results = parse_detection_result(detections, image)
+    persons = extract_persons(face_detection_results, image)
+    if len(persons) == 0:
+        return add_text(image, "No faces detected in the image")
+    if len(persons) == 1:
+        image = apply_random_mask(image,persons[0])
+    elif len(persons) > 1:
+        # Apply the rules of the assignment, closest person gets santa hat, furthest person gets reindeer mask
+        # All other people get a random mask (either santa hat or reindeer mask) (as this was not specified in the assignment)
+        depth_estimates = estimate_depth_at_points(depth_result, [(person.nose_x, person.nose_y) for person in persons])
+        closest_camera_index = np.argmin(depth_estimates)
+        furthest_camera_index = np.argmax(depth_estimates)
+        santa_person = persons[closest_camera_index]
+        reindeer_person = persons[furthest_camera_index]
+        image = apply_reindeer_mask(image, reindeer_person)
+        image = apply_santa_hat_mask(image, santa_person)
+        for i, person in enumerate(persons):
+            if i != closest_camera_index and i != furthest_camera_index:
+                image = apply_random_mask(image, person)
+    return image
+def parse_detection_to_face_detection_result(detection, image_width: int, image_height: int) -> FaceDetectionResult:
+    """
+    Parse a mediapipe detection to a FaceDetectionResult
+    """
+    # Extract bounding box
+    bbox = detection.location_data.relative_bounding_box
+    x_min = int(bbox.xmin * image_width)
+    y_min = int(bbox.ymin * image_height)
+    width = int(bbox.width * image_width)
+    height = int(bbox.height * image_height)
+    bounding_box = BoundingBox(x_min, y_min, width, height)
+    # Extract keypoints
+    keypoints = []
+    for i, keypoint in enumerate(detection.location_data.relative_keypoints):
+        x = keypoint.x
+        y = keypoint.y
+        face_type = FaceKeypointsLabel.OTHER
+        if i == 2:
+            face_type = FaceKeypointsLabel.NOSE
+        keypoints.append(Keypoints(x, y, face_type))
+    return FaceDetectionResult(bounding_box, keypoints)
+def parse_detection_result(detection_result, image: Image) -> List[FaceDetectionResult]:
+    """
+    Parse a mediapipe detection result to a list of FaceDetectionResult
+    """
+    face_detection_results = []
+    for detection in detection_result:
+        face_detection_result = parse_detection_to_face_detection_result(detection, image.width, image.height)
+        face_detection_results.append(face_detection_result)
+    return face_detection_results
+def main():
+    # Remarks: the code is in one file for simplicity, but it would be better to split it up in multiple files
+    # Create a gradio interface
+    iface = gr.Interface(
+    fn=process_image,
+    inputs=grImage(type="pil"),
+    outputs=grImage(type="pil"),
+    title="Image Processor",
+    description="Upload an image to detect faces and apply transformations."
+    )
+    # Launch the interface
+    iface.launch()
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+numpy
+torch
+Pillow
+transformers
+gradio
+mediapipe