baptistecolle HF staff commited on
Commit
e4bf926
·
unverified ·
1 Parent(s): a7cc668

first version

Browse files
Files changed (2) hide show
  1. app.py +337 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,337 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import Enum
2
+ import numpy as np
3
+ import gradio as gr
4
+ import torch
5
+ from PIL import Image
6
+ from transformers import DPTImageProcessor, DPTForDepthEstimation
7
+ from typing import List, Tuple
8
+ import random
9
+ from PIL import ImageDraw, ImageFont
10
+ from gradio.components import Image as grImage
11
+ import mediapipe as mp
12
+
13
+
14
+ processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
15
+ model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
16
+
17
+ detector = mp.solutions.face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.5)
18
+
19
+ class Placement(Enum):
20
+ CENTER = 0
21
+ TOP = 1
22
+
23
+ class FaceKeypointsLabel(Enum):
24
+ OTHER = 0
25
+ NOSE = 1
26
+
27
+ class Keypoints:
28
+ def __init__(self, x: float, y: float, label: FaceKeypointsLabel):
29
+ """
30
+ :param x: x coordinate of the keypoint, normalized between 0 and 1
31
+ :param y: y coordinate of the keypoint, normalized between 0 and 1
32
+ """
33
+ self.x = x
34
+ self.y = y
35
+ self.label = label
36
+
37
+ class BoundingBox:
38
+ def __init__(self, x_min: int, y_min: int, width: int, height: int):
39
+ self.x_min = x_min
40
+ self.y_min = y_min
41
+ self.width = width
42
+ self.height = height
43
+
44
+ class FaceDetectionResult:
45
+ """
46
+ A class to represent the result of a face detection
47
+ """
48
+ def __init__(self, bounding_box : BoundingBox, keypoints: List[Keypoints]):
49
+ self.bounding_box = bounding_box
50
+ self.keypoints = keypoints
51
+
52
+
53
+ def detect_face(image: Image) -> List[any]:
54
+ """
55
+ Use mediapipe to detect faces in an image
56
+ """
57
+ result = detector.process(np.array(image))
58
+ if result.detections is None:
59
+ return []
60
+ return result.detections
61
+
62
+
63
+ def predict_depth(image: Image) -> np.ndarray:
64
+ """
65
+ Predict depth for an image
66
+ """
67
+ inputs = processor(images=image, return_tensors="pt")
68
+
69
+ with torch.no_grad():
70
+ outputs = model(**inputs)
71
+ predicted_depth = outputs.predicted_depth
72
+
73
+ # Interpolate to original size
74
+ prediction = torch.nn.functional.interpolate(
75
+ predicted_depth.unsqueeze(1),
76
+ size=image.size[::-1],
77
+ mode="bicubic",
78
+ align_corners=False,
79
+ )
80
+
81
+ output = prediction.squeeze().cpu().numpy()
82
+ return (output * 255 / np.max(output)).astype("uint8")
83
+
84
+ def estimate_depth_at_points(depth_map: np.ndarray, coordinates: List[Tuple[int, int]]) -> List[float]:
85
+ """
86
+ Get the depth at a given coordinates
87
+ """
88
+ depth_estimates = []
89
+
90
+ # Iterate through the given coordinates and estimate depth at each point
91
+ for x, y in coordinates:
92
+ depth_estimate = depth_map[y, x] # Access depth at the given point
93
+ depth_estimates.append(depth_estimate)
94
+
95
+ return depth_estimates
96
+
97
+
98
+ class Person:
99
+ """
100
+ A class to represent a person in an image
101
+ """
102
+
103
+ def __init__(self, nose_x: int, nose_y: int, head_width: int, head_height: int, middle_top_head_x: int, middle_top_head_y: int):
104
+ self.nose_x = nose_x
105
+ self.nose_y = nose_y
106
+ self.head_width = head_width
107
+ self.head_height = head_height
108
+ self.middle_top_head_x = middle_top_head_x
109
+ self.middle_top_head_y = middle_top_head_y
110
+ self.nose_width = int(head_width / 5)
111
+ self.nose_height = int(head_height / 3)
112
+
113
+ def extract_persons(face_detection_results: List[FaceDetectionResult], image: Image) -> List[Person]:
114
+ """
115
+ Extract a list of people from a face detection result
116
+ """
117
+ persons = []
118
+
119
+ for face_result in face_detection_results:
120
+ bbox = face_result.bounding_box
121
+ keypoints = face_result.keypoints
122
+
123
+ # Assuming the nose is the first keypoint in the list.
124
+ # You might need to adjust this based on how keypoints are ordered.
125
+ for keypoint in keypoints:
126
+ if keypoint.label == FaceKeypointsLabel.NOSE:
127
+ nose_keypoint = keypoint
128
+ break
129
+
130
+ nose_x = int(nose_keypoint.x * image.width)
131
+ nose_y = int(nose_keypoint.y * image.height)
132
+
133
+ # Bounding box details
134
+ middle_top_head_x = int(bbox.x_min + bbox.width // 2)
135
+ middle_top_head_y = bbox.y_min
136
+ head_width = bbox.width
137
+ head_height = bbox.height
138
+
139
+ # Create and add Person object
140
+ person = Person(nose_x, nose_y, head_width, head_height, middle_top_head_x, middle_top_head_y)
141
+ persons.append(person)
142
+
143
+ return persons
144
+
145
+ def add_mask(image: Image, mask: Image, coordinate: Tuple[int, int], size: Tuple[int, int], placement: Placement) -> Image:
146
+ """
147
+ Add a mask (a static image) to an image
148
+ """
149
+
150
+ # maintain aspect ratio
151
+ if len(size) == 1:
152
+ height = mask.height
153
+ width = mask.width
154
+ ratio = height / width
155
+ size = (size[0], int(size[0] * ratio))
156
+
157
+ if placement == Placement.CENTER:
158
+ coordinate = (coordinate[0] - size[0] // 2, coordinate[1] - size[1] // 2)
159
+ elif placement == Placement.TOP:
160
+ coordinate = (coordinate[0] - size[0] // 2, coordinate[1] - size[1])
161
+
162
+ mask = mask.resize(size)
163
+ image.paste(mask, coordinate, mask)
164
+ return image
165
+
166
+ def draw_attributes(image: Image, persons: List[Person]) -> Image:
167
+ """
168
+ Debug function to the face recognition attributes on an image
169
+ """
170
+ draw = ImageDraw.Draw(image)
171
+ font = ImageFont.load_default()
172
+
173
+ for person in persons:
174
+ # Draw a circle at the nose position
175
+ draw.ellipse([(person.nose_x - 5, person.nose_y - 5), (person.nose_x + 5, person.nose_y + 5)], fill=(0, 255, 0))
176
+
177
+ # Draw the head rectangle
178
+ draw.rectangle([(person.middle_top_head_x - person.head_width // 2, person.middle_top_head_y),
179
+ (person.middle_top_head_x + person.head_width // 2, person.middle_top_head_y + person.head_height)],
180
+ outline=(0, 255, 0))
181
+
182
+ # Put text for dimensions
183
+ draw.text((person.middle_top_head_x, person.middle_top_head_y - 20), f"Width: {person.head_width}, Height: {person.head_height}", fill=(255, 255, 255), font=font)
184
+ # put location of nose
185
+ draw.text((person.nose_x, person.nose_y + 10), f"({person.nose_x}, {person.nose_y})", fill=(255, 255, 255), font=font)
186
+
187
+ # draw dot at middle top head
188
+ draw.ellipse([(person.middle_top_head_x - 5, person.middle_top_head_y - 5), (person.middle_top_head_x + 5, person.middle_top_head_y + 5)], fill=(255, 0, 0))
189
+
190
+ return image
191
+
192
+ def apply_reindeer_mask(image: Image, person: Person) -> Image:
193
+ """
194
+ Apply a reindeer mask to a person in an image
195
+ """
196
+ reindeer_nose = Image.open("cv/mask/reindeer_nose.png")
197
+ reindeer_antlers = Image.open("cv/mask/reindeer_antlers.png")
198
+
199
+ reindeer_nose_coordinate = (person.nose_x, person.nose_y)
200
+
201
+ reindeer_nose_size = (person.nose_height, person.nose_height)
202
+ image = add_mask(image, reindeer_nose, reindeer_nose_coordinate, reindeer_nose_size, Placement.CENTER)
203
+
204
+ reindeer_antlers_size = (person.head_width, )
205
+ reindeer_antlers_coordinate = (person.middle_top_head_x, person.middle_top_head_y)
206
+ image = add_mask(image, reindeer_antlers, reindeer_antlers_coordinate, reindeer_antlers_size, Placement.TOP)
207
+ return image
208
+
209
+ def apply_santa_hat_mask(image: Image, person: Person) -> Image:
210
+ """
211
+ Apply a santa hat mask to a person in an image
212
+ """
213
+ santa_hat = Image.open("cv/mask/santa_hat.png")
214
+ santa_hat_size = (person.head_width, )
215
+ santa_hat_coordinate = (person.middle_top_head_x, person.middle_top_head_y)
216
+ image = add_mask(image, santa_hat, santa_hat_coordinate, santa_hat_size, Placement.TOP)
217
+ return image
218
+
219
+ def add_text(image: Image, text: str, font_size: int = 30) -> Image:
220
+ """
221
+ Add text to an image
222
+ """
223
+ draw = ImageDraw.Draw(image)
224
+
225
+ # Calculate text width and height for centering
226
+ text_width, text_height = draw.textsize(text)
227
+ text_x = (image.width - text_width) // 2
228
+ text_y = (image.height - text_height) // 2
229
+
230
+ draw.text((text_x, text_y), text, fill=(255, 0, 0))
231
+ return image
232
+
233
+ def apply_random_mask(image: Image, person: Person) -> Image:
234
+ """
235
+ Apply a random mask to a person in an image
236
+ """
237
+ mask = random.choice([apply_santa_hat_mask, apply_reindeer_mask])
238
+ image = mask(image, person)
239
+ return image
240
+
241
+
242
+ def process_image(image : Image):
243
+ """
244
+ The full pipeline that take an image and returns an image with more christmas spirit :)
245
+ """
246
+
247
+ # Potential improvement this could be done in parallel
248
+ depth_result = predict_depth(image)
249
+ detections = detect_face(image)
250
+
251
+ face_detection_results = parse_detection_result(detections, image)
252
+ persons = extract_persons(face_detection_results, image)
253
+
254
+ if len(persons) == 0:
255
+ return add_text(image, "No faces detected in the image")
256
+ if len(persons) == 1:
257
+ image = apply_random_mask(image,persons[0])
258
+ elif len(persons) > 1:
259
+ # Apply the rules of the assignment, closest person gets santa hat, furthest person gets reindeer mask
260
+ # All other people get a random mask (either santa hat or reindeer mask) (as this was not specified in the assignment)
261
+
262
+ depth_estimates = estimate_depth_at_points(depth_result, [(person.nose_x, person.nose_y) for person in persons])
263
+ closest_camera_index = np.argmin(depth_estimates)
264
+ furthest_camera_index = np.argmax(depth_estimates)
265
+ santa_person = persons[closest_camera_index]
266
+ reindeer_person = persons[furthest_camera_index]
267
+
268
+ image = apply_reindeer_mask(image, reindeer_person)
269
+ image = apply_santa_hat_mask(image, santa_person)
270
+
271
+ for i, person in enumerate(persons):
272
+ if i != closest_camera_index and i != furthest_camera_index:
273
+ image = apply_random_mask(image, person)
274
+
275
+ return image
276
+
277
+ def parse_detection_to_face_detection_result(detection, image_width: int, image_height: int) -> FaceDetectionResult:
278
+ """
279
+ Parse a mediapipe detection to a FaceDetectionResult
280
+ """
281
+
282
+ # Extract bounding box
283
+ bbox = detection.location_data.relative_bounding_box
284
+ x_min = int(bbox.xmin * image_width)
285
+ y_min = int(bbox.ymin * image_height)
286
+ width = int(bbox.width * image_width)
287
+ height = int(bbox.height * image_height)
288
+ bounding_box = BoundingBox(x_min, y_min, width, height)
289
+
290
+ # Extract keypoints
291
+ keypoints = []
292
+ for i, keypoint in enumerate(detection.location_data.relative_keypoints):
293
+ x = keypoint.x
294
+ y = keypoint.y
295
+ face_type = FaceKeypointsLabel.OTHER
296
+ if i == 2:
297
+ face_type = FaceKeypointsLabel.NOSE
298
+ keypoints.append(Keypoints(x, y, face_type))
299
+
300
+ return FaceDetectionResult(bounding_box, keypoints)
301
+
302
+
303
+ def parse_detection_result(detection_result, image: Image) -> List[FaceDetectionResult]:
304
+ """
305
+ Parse a mediapipe detection result to a list of FaceDetectionResult
306
+ """
307
+ face_detection_results = []
308
+
309
+
310
+ for detection in detection_result:
311
+ face_detection_result = parse_detection_to_face_detection_result(detection, image.width, image.height)
312
+ face_detection_results.append(face_detection_result)
313
+
314
+ return face_detection_results
315
+
316
+ def main():
317
+
318
+ # Remarks: the code is in one file for simplicity, but it would be better to split it up in multiple files
319
+
320
+ # Create a gradio interface
321
+ iface = gr.Interface(
322
+ fn=process_image,
323
+ inputs=grImage(type="pil"),
324
+ outputs=grImage(type="pil"),
325
+ title="Image Processor",
326
+ description="Upload an image to detect faces and apply transformations."
327
+ )
328
+
329
+ # Launch the interface
330
+ iface.launch()
331
+
332
+
333
+ if __name__ == "__main__":
334
+ main()
335
+
336
+
337
+
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ numpy
2
+ torch
3
+ Pillow
4
+ transformers
5
+ gradio
6
+ mediapipe