Spaces:
Build error
Build error
File size: 3,799 Bytes
a9640c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import cv2
import typing
import numpy as np
import mediapipe as mp
class MPFaceDetection:
"""Object to create and do mediapipe face detection, more about it:
https://google.github.io/mediapipe/solutions/face_detection.html
"""
def __init__(
self,
model_selection: bool = 1,
confidence: float = 0.5,
mp_drawing_utils: bool = True,
color: typing.Tuple[int, int, int] = (255, 255, 255),
thickness: int = 2,
) -> None:
"""
Args:
model_selection: (bool) - 1 - for low distance, 0 - for far distance face detectors.
confidence: (float) - confidence for face detector, when detection are confirmed, range (0.0-1.0).
mp_drawing_utils: (bool) - bool option whether to use mp_drawing utils or or own, Default to True.
color: (typing.Tuple[int, int, int]) - Color for drawing the annotation. Default to the white color.
thickness: (int) - Thickness for drawing the annotation. Default to 2 pixels.
"""
self.mp_drawing_utils = mp_drawing_utils
self.color = color
self.thickness = thickness
self.mp_drawing = mp.solutions.drawing_utils
self.mp_face_detection = mp.solutions.face_detection
self.face_detection = self.mp_face_detection.FaceDetection(model_selection=model_selection, min_detection_confidence=confidence)
def tlbr(self, frame: np.ndarray, mp_detections: typing.List) -> np.ndarray:
"""Return coorinates in typing.Iterable([[Top, Left, Bottom, Right]])
Args:
frame: (np.ndarray) - frame on which we want to apply detections
mp_detections: (typing.List) - list of media pipe detections
Returns:
detections: (np.ndarray) - list of detection in [Top, Left, Bottom, Right] coordinates
"""
detections = []
frame_height, frame_width, _ = frame.shape
for detection in mp_detections:
height = int(detection.location_data.relative_bounding_box.height * frame_height)
width = int(detection.location_data.relative_bounding_box.width * frame_width)
left = max(0 ,int(detection.location_data.relative_bounding_box.xmin * frame_width))
top = max(0 ,int(detection.location_data.relative_bounding_box.ymin * frame_height))
detections.append([top, left, top + height, left + width])
return np.array(detections)
def __call__(self, frame: np.ndarray, return_tlbr: bool = False) -> np.ndarray:
"""Main function to do face detection
Args:
frame: (np.ndarray) - frame to excecute face detection on
return_tlbr: (bool) - bool option to return coordinates instead of frame with drawn detections
Returns:
typing.Union[
frame: (np.ndarray) - processed frame with detected faces,
detections: (typing.List) - detections in [Top, Left, Bottom, Right]
]
"""
results = self.face_detection.process(frame)
if return_tlbr:
if results.detections:
return self.tlbr(frame, results.detections)
return []
if results.detections:
if self.mp_drawing_utils:
# Draw face detections of each face using media pipe drawing utils.
for detection in results.detections:
self.mp_drawing.draw_detection(frame, detection)
else:
# Draw face detections of each face using our own tlbr and cv2.rectangle
for tlbr in self.tlbr(frame, results.detections):
cv2.rectangle(frame, tlbr[:2][::-1], tlbr[2:][::-1], self.color, self.thickness)
return frame |