Update app.py
Browse files
app.py
CHANGED
|
@@ -6,263 +6,288 @@ import time
|
|
| 6 |
import tempfile
|
| 7 |
import os
|
| 8 |
|
| 9 |
-
# --- MediaPipe Initialization ---
|
|
|
|
|
|
|
| 10 |
try:
|
| 11 |
mp_face_mesh = mp.solutions.face_mesh
|
| 12 |
face_mesh = mp_face_mesh.FaceMesh(
|
| 13 |
-
static_image_mode=False,
|
| 14 |
max_num_faces=1,
|
| 15 |
refine_landmarks=True,
|
| 16 |
-
min_detection_confidence=0.5
|
|
|
|
| 17 |
)
|
| 18 |
-
print("MediaPipe Face Mesh initialized successfully.")
|
| 19 |
except (ImportError, AttributeError):
|
| 20 |
print("Error: Could not initialize MediaPipe Face Mesh. Is mediapipe installed correctly?")
|
| 21 |
face_mesh = None
|
| 22 |
|
| 23 |
-
# --- Helper Functions ---
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
|
| 27 |
-
def
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
mask = np.zeros((h, w), dtype=np.uint8)
|
| 30 |
-
results = face_mesh.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
|
| 31 |
-
if not results.multi_face_landmarks:
|
| 32 |
-
return None, None
|
| 33 |
-
pts = np.array([(int(p.x * w), int(p.y * h)) for p in results.multi_face_landmarks[0].landmark], np.int32)
|
| 34 |
-
hull = cv2.convexHull(pts)
|
| 35 |
cv2.fillConvexPoly(mask, hull, 255)
|
|
|
|
| 36 |
x, y, bw, bh = cv2.boundingRect(hull)
|
| 37 |
-
|
|
|
|
| 38 |
x_pad = max(x - padding, 0)
|
| 39 |
y_pad = max(y - padding, 0)
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
mask_roi = mask[y_pad:y2, x_pad:x2]
|
| 43 |
-
# inside feather
|
| 44 |
-
if feather > 0 and mask_roi.size > 0:
|
| 45 |
-
k = int(feather)
|
| 46 |
-
mask_roi = cv2.GaussianBlur(mask_roi, (k*2+1, k*2+1), 0)
|
| 47 |
-
return mask_roi, (x_pad, y_pad, x2 - x_pad, y2 - y_pad)
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
if not results.multi_face_landmarks:
|
| 55 |
-
return np.zeros_like(img), None, None
|
| 56 |
-
pts = np.array([(int(p.x * w), int(p.y * h)) for p in results.multi_face_landmarks[0].landmark], np.int32)
|
| 57 |
-
hull = cv2.convexHull(pts)
|
| 58 |
-
cv2.fillConvexPoly(mask, hull, 255)
|
| 59 |
-
# bounding box
|
| 60 |
-
x, y, bw, bh = cv2.boundingRect(hull)
|
| 61 |
-
# feather mask
|
| 62 |
-
k = int(feather)
|
| 63 |
-
if k > 0:
|
| 64 |
-
mask = cv2.GaussianBlur(mask, (k*2+1, k*2+1), 0)
|
| 65 |
-
# extract face ROI
|
| 66 |
-
face_roi = img[y:y+bh, x:x+bw]
|
| 67 |
-
mask_roi = mask[y:y+bh, x:x+bw]
|
| 68 |
-
# apply mask
|
| 69 |
-
fg = cv2.bitwise_and(face_roi, face_roi, mask=mask_roi)
|
| 70 |
-
# prepare alpha
|
| 71 |
-
alpha = mask_roi.astype(np.float32) / 255.0
|
| 72 |
-
# composite onto transparent background same size
|
| 73 |
-
out = (fg.astype(np.float32) * alpha[..., None]).astype(np.uint8)
|
| 74 |
-
return out, mask_roi, (x, y, bw, bh)
|
| 75 |
-
|
| 76 |
-
def get_landmarks(img, landmark_step=1):
|
| 77 |
-
if img is None or face_mesh is None:
|
| 78 |
-
return None
|
| 79 |
-
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
| 80 |
-
try:
|
| 81 |
-
results = face_mesh.process(img_rgb)
|
| 82 |
-
except Exception:
|
| 83 |
-
return None
|
| 84 |
-
if not results.multi_face_landmarks:
|
| 85 |
-
return None
|
| 86 |
-
landmarks_mp = results.multi_face_landmarks[0]
|
| 87 |
-
h, w, _ = img.shape
|
| 88 |
-
pts = np.array([(pt.x * w, pt.y * h) for pt in landmarks_mp.landmark], dtype=np.float32)
|
| 89 |
-
landmarks = pts[::landmark_step] if landmark_step > 1 else pts
|
| 90 |
-
if not np.all(np.isfinite(landmarks)):
|
| 91 |
-
return None
|
| 92 |
-
corners = np.array([[0,0],[w-1,0],[0,h-1],[w-1,h-1]], dtype=np.float32)
|
| 93 |
-
return np.vstack((landmarks, corners))
|
| 94 |
-
|
| 95 |
|
| 96 |
def calculate_delaunay_triangles(rect, points):
|
| 97 |
-
|
|
|
|
| 98 |
return []
|
| 99 |
-
points[:,0] = np.clip(points[:,0], rect[0], rect[0]+rect[2]-1)
|
| 100 |
-
points[:,1] = np.clip(points[:,1], rect[1], rect[1]+rect[3]-1)
|
| 101 |
subdiv = cv2.Subdiv2D(rect)
|
| 102 |
-
|
| 103 |
-
for i,p in enumerate(points)
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
except cv2.error:
|
| 110 |
-
continue
|
| 111 |
-
tris = subdiv.getTriangleList()
|
| 112 |
-
delaunay=[]
|
| 113 |
-
for t in tris:
|
| 114 |
-
coords=[(int(t[0]),int(t[1])),(int(t[2]),int(t[3])),(int(t[4]),int(t[5]))]
|
| 115 |
-
if all(rect[0]<=x<rect[0]+rect[2] and rect[1]<=y<rect[1]+rect[3] for x,y in coords):
|
| 116 |
-
idxs=[inserted.get(c) for c in coords]
|
| 117 |
-
if all(i is not None for i in idxs) and len(set(idxs))==3:
|
| 118 |
-
delaunay.append(idxs)
|
| 119 |
-
return delaunay
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
def warp_triangle(img1,img2,t1,t2):
|
| 123 |
-
if len(t1)!=3 or len(t2)!=3:
|
| 124 |
-
return
|
| 125 |
-
r1=cv2.boundingRect(np.float32([t1]))
|
| 126 |
-
r2=cv2.boundingRect(np.float32([t2]))
|
| 127 |
|
| 128 |
-
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
|
|
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
for ids in tris:
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
|
|
|
|
| 168 |
|
| 169 |
def process_video(video_path, ref_img, trans, res, step, feather, padding):
|
| 170 |
cap = cv2.VideoCapture(video_path)
|
| 171 |
fps = cap.get(cv2.CAP_PROP_FPS) or 24
|
| 172 |
-
|
| 173 |
|
| 174 |
-
#
|
| 175 |
ref_bgr = cv2.cvtColor(ref_img, cv2.COLOR_RGB2BGR)
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
w_o = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 187 |
h_o = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 188 |
tmp_vid = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
|
| 189 |
out_vid = cv2.VideoWriter(tmp_vid, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w_o, h_o))
|
| 190 |
-
|
|
|
|
| 191 |
first_crop = None
|
| 192 |
-
first_mask = None
|
| 193 |
-
first_ref = None
|
| 194 |
first_morphed = None
|
| 195 |
|
| 196 |
-
|
|
|
|
| 197 |
ret, frame = cap.read()
|
| 198 |
if not ret: break
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
continue
|
|
|
|
|
|
|
|
|
|
| 203 |
x, y, w, h = box
|
| 204 |
-
|
|
|
|
| 205 |
crop = frame[y:y+h, x:x+w]
|
| 206 |
crop_resized = cv2.resize(crop, (res, res))
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
if i == 0:
|
| 212 |
first_crop = crop_resized.copy()
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
#
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
|
|
|
|
|
|
|
|
|
| 222 |
frame[y:y+h, x:x+w] = blended.astype(np.uint8)
|
|
|
|
| 223 |
out_vid.write(frame)
|
| 224 |
|
| 225 |
-
cap.release()
|
| 226 |
-
|
| 227 |
-
# Apply mask to first_morphed for preview
|
| 228 |
-
if first_morphed is not None and first_mask is not None:
|
| 229 |
-
mask_n0 = first_mask.astype(np.float32)[..., None] / 255.0
|
| 230 |
-
first_morphed = (first_morphed.astype(np.float32) * mask_n0).astype(np.uint8)
|
| 231 |
-
else:
|
| 232 |
-
first_morphed = np.zeros((res, res, 3), dtype=np.uint8)
|
| 233 |
-
first_crop = first_crop if first_crop is not None else np.zeros((res, res,3),np.uint8)
|
| 234 |
-
first_ref = first_ref if first_ref is not None else ref_morph.copy()
|
| 235 |
|
| 236 |
-
# Convert for Gradio
|
| 237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
|
| 239 |
# --- Gradio App ---
|
| 240 |
css = """video, img { object-fit: contain !important; }"""
|
| 241 |
-
with gr.Blocks(css=css) as iface:
|
| 242 |
-
gr.Markdown("#
|
|
|
|
| 243 |
with gr.Row():
|
| 244 |
vid = gr.Video(label='Input Video')
|
| 245 |
ref = gr.Image(type='numpy', label='Reference Image')
|
| 246 |
with gr.Row():
|
| 247 |
-
res = gr.Dropdown([256,384,512
|
| 248 |
-
step = gr.Slider(1,4,value=4,step=1,label='Landmark Sub-sampling')
|
| 249 |
-
feather = gr.Slider(0,50,value=
|
| 250 |
-
padding = gr.Slider(0,100,value=
|
| 251 |
-
trans = gr.Slider(-1.0,1.0,value=-0.
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
| 258 |
btn.click(
|
| 259 |
fn=process_video,
|
| 260 |
-
inputs=[vid,ref,trans,res,step,feather,padding],
|
| 261 |
-
outputs=[out_vid,out_crop,out_ref,out_morph],
|
| 262 |
-
show_progress=
|
| 263 |
)
|
| 264 |
-
gr.Markdown("---\n*Added padding to the face crop for better framing.*")
|
| 265 |
|
| 266 |
-
if __name__=='__main__':
|
| 267 |
iface.launch(debug=True)
|
| 268 |
-
|
|
|
|
| 6 |
import tempfile
|
| 7 |
import os
|
| 8 |
|
| 9 |
+
# --- MediaPipe Initialization for Video ---
|
| 10 |
+
# Optimized for video by setting static_image_mode to False
|
| 11 |
+
# This enables tracking and significantly improves speed and stability.
|
| 12 |
try:
|
| 13 |
mp_face_mesh = mp.solutions.face_mesh
|
| 14 |
face_mesh = mp_face_mesh.FaceMesh(
|
| 15 |
+
static_image_mode=False, # <-- The most important change for video
|
| 16 |
max_num_faces=1,
|
| 17 |
refine_landmarks=True,
|
| 18 |
+
min_detection_confidence=0.5,
|
| 19 |
+
min_tracking_confidence=0.5 # Confidence for tracking across frames
|
| 20 |
)
|
| 21 |
+
print("MediaPipe Face Mesh initialized for VIDEO successfully.")
|
| 22 |
except (ImportError, AttributeError):
|
| 23 |
print("Error: Could not initialize MediaPipe Face Mesh. Is mediapipe installed correctly?")
|
| 24 |
face_mesh = None
|
| 25 |
|
| 26 |
+
# --- Helper Functions (Refactored for Efficiency) ---
|
| 27 |
|
| 28 |
+
def get_landmarks_from_result(results, img_shape):
|
| 29 |
+
"""Extracts landmarks from a MediaPipe results object."""
|
| 30 |
+
if not results or not results.multi_face_landmarks:
|
| 31 |
+
return None
|
| 32 |
+
h, w = img_shape[:2]
|
| 33 |
+
# Note: Using a NumPy array directly is faster than list comprehensions for this.
|
| 34 |
+
landmarks = np.array([(lm.x * w, lm.y * h) for lm in results.multi_face_landmarks[0].landmark], dtype=np.float32)
|
| 35 |
+
|
| 36 |
+
# Add image corners to landmarks for robust triangulation
|
| 37 |
+
corners = np.array([[0, 0], [w - 1, 0], [0, h - 1], [w - 1, h - 1]], dtype=np.float32)
|
| 38 |
+
return np.vstack((landmarks, corners))
|
| 39 |
|
| 40 |
|
| 41 |
+
def get_face_mask_box_from_landmarks(landmarks, img_shape, feather, padding=0):
|
| 42 |
+
"""Generates a face mask and bounding box from pre-computed landmarks."""
|
| 43 |
+
h, w = img_shape[:2]
|
| 44 |
+
# We only need the facial landmarks, not the corners we added
|
| 45 |
+
face_landmarks = landmarks[:-4]
|
| 46 |
+
|
| 47 |
+
hull = cv2.convexHull(face_landmarks.astype(np.int32))
|
| 48 |
mask = np.zeros((h, w), dtype=np.uint8)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
cv2.fillConvexPoly(mask, hull, 255)
|
| 50 |
+
|
| 51 |
x, y, bw, bh = cv2.boundingRect(hull)
|
| 52 |
+
|
| 53 |
+
# Apply padding
|
| 54 |
x_pad = max(x - padding, 0)
|
| 55 |
y_pad = max(y - padding, 0)
|
| 56 |
+
x2_pad = min(x + bw + padding, w)
|
| 57 |
+
y2_pad = min(y + bh + padding, h)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
+
# Feather the mask for smoother blending
|
| 60 |
+
if feather > 0:
|
| 61 |
+
k = int(feather)
|
| 62 |
+
if k % 2 == 0: k += 1 # Kernel size must be odd
|
| 63 |
+
mask = cv2.GaussianBlur(mask, (k, k), 0)
|
| 64 |
|
| 65 |
+
box = (x_pad, y_pad, x2_pad - x_pad, y2_pad - y_pad)
|
| 66 |
+
mask_roi = mask[y_pad:y2_pad, x_pad:x2_pad]
|
| 67 |
+
|
| 68 |
+
return mask_roi, box
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
def calculate_delaunay_triangles(rect, points):
|
| 71 |
+
"""Calculates Delaunay triangles for a set of points."""
|
| 72 |
+
if points is None or len(points) < 3:
|
| 73 |
return []
|
|
|
|
|
|
|
| 74 |
subdiv = cv2.Subdiv2D(rect)
|
| 75 |
+
# Using a dictionary is faster for checking existing points
|
| 76 |
+
point_map = { (int(p[0]), int(p[1])): i for i, p in enumerate(points) }
|
| 77 |
+
for p in point_map.keys():
|
| 78 |
+
subdiv.insert(p)
|
| 79 |
+
|
| 80 |
+
triangle_list = subdiv.getTriangleList()
|
| 81 |
+
delaunay_triangles = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
+
for t in triangle_list:
|
| 84 |
+
pt1 = (int(t[0]), int(t[1]))
|
| 85 |
+
pt2 = (int(t[2]), int(t[3]))
|
| 86 |
+
pt3 = (int(t[4]), int(t[5]))
|
| 87 |
+
|
| 88 |
+
# Check if all points are within the image boundaries
|
| 89 |
+
if rect[0] <= pt1[0] < rect[0] + rect[2] and rect[1] <= pt1[1] < rect[1] + rect[3] and \
|
| 90 |
+
rect[0] <= pt2[0] < rect[0] + rect[2] and rect[1] <= pt2[1] < rect[1] + rect[3] and \
|
| 91 |
+
rect[0] <= pt3[0] < rect[0] + rect[2] and rect[1] <= pt3[1] < rect[1] + rect[3]:
|
| 92 |
+
|
| 93 |
+
# Get the indices from our original point list
|
| 94 |
+
idx1 = point_map.get(pt1)
|
| 95 |
+
idx2 = point_map.get(pt2)
|
| 96 |
+
idx3 = point_map.get(pt3)
|
| 97 |
|
| 98 |
+
if idx1 is not None and idx2 is not None and idx3 is not None:
|
| 99 |
+
delaunay_triangles.append([idx1, idx2, idx3])
|
| 100 |
+
|
| 101 |
+
return delaunay_triangles
|
| 102 |
|
| 103 |
+
|
| 104 |
+
def warp_triangle(img1, img2, t1, t2):
|
| 105 |
+
"""Warps a triangle from img1 to img2."""
|
| 106 |
+
r1 = cv2.boundingRect(t1)
|
| 107 |
+
r2 = cv2.boundingRect(t2)
|
| 108 |
+
|
| 109 |
+
# Crop triangle ROI
|
| 110 |
+
t1_rect = t1 - r1[:2]
|
| 111 |
+
t2_rect = t2 - r2[:2]
|
| 112 |
|
| 113 |
+
img1_cropped = img1[r1[1]:r1[1]+r1[3], r1[0]:r1[0]+r1[2]]
|
| 114 |
+
|
| 115 |
+
# Get affine transform
|
| 116 |
+
warp_mat = cv2.getAffineTransform(t1_rect, t2_rect)
|
| 117 |
+
img2_cropped = cv2.warpAffine(img1_cropped, warp_mat, (r2[2], r2[3]), None,
|
| 118 |
+
flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101)
|
| 119 |
+
|
| 120 |
+
# Create mask for blending
|
| 121 |
+
mask = np.zeros((r2[3], r2[2]), dtype=np.uint8)
|
| 122 |
+
cv2.fillConvexPoly(mask, t2_rect.astype(np.int32), 255)
|
| 123 |
+
|
| 124 |
+
# Blend the warped triangle
|
| 125 |
+
img2_rect = img2[r2[1]:r2[1]+r2[3], r2[0]:r2[0]+r2[2]]
|
| 126 |
+
img2_rect[mask > 0] = img2_cropped[mask > 0]
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def morph_faces(img1, img2, l1, l2, alpha, dim):
|
| 130 |
+
"""
|
| 131 |
+
Morphs two faces using pre-computed landmarks.
|
| 132 |
+
This function no longer performs landmark detection itself.
|
| 133 |
+
"""
|
| 134 |
+
# Create intermediate landmarks
|
| 135 |
+
morphed_landmarks = (1 - alpha) * l1 + alpha * l2
|
| 136 |
+
|
| 137 |
+
# Triangulate the intermediate mesh
|
| 138 |
+
tris = calculate_delaunay_triangles((0, 0, dim, dim), morphed_landmarks)
|
| 139 |
+
if not tris: # If triangulation fails, just cross-fade
|
| 140 |
+
return cv2.addWeighted(img1, 1 - alpha, img2, alpha, 0)
|
| 141 |
+
|
| 142 |
+
morphed_img = np.zeros_like(img1, dtype=np.float32)
|
| 143 |
+
img1_f = img1.astype(np.float32)
|
| 144 |
+
img2_f = img2.astype(np.float32)
|
| 145 |
+
|
| 146 |
for ids in tris:
|
| 147 |
+
# Get triangles from each set of landmarks
|
| 148 |
+
t1 = l1[ids].astype(np.float32)
|
| 149 |
+
t2 = l2[ids].astype(np.float32)
|
| 150 |
+
tm = morphed_landmarks[ids].astype(np.float32)
|
| 151 |
+
|
| 152 |
+
# Warp both images to the intermediate mesh
|
| 153 |
+
warped1 = np.zeros_like(morphed_img)
|
| 154 |
+
warped2 = np.zeros_like(morphed_img)
|
| 155 |
+
warp_triangle(img1_f, warped1, t1, tm)
|
| 156 |
+
warp_triangle(img2_f, warped2, t2, tm)
|
| 157 |
+
|
| 158 |
+
# Blend the two warped images
|
| 159 |
+
morphed_triangle = (1 - alpha) * warped1 + alpha * warped2
|
| 160 |
+
|
| 161 |
+
# Add the blended triangle to the final image
|
| 162 |
+
mask = np.zeros((dim, dim), dtype=np.uint8)
|
| 163 |
+
cv2.fillConvexPoly(mask, tm.astype(np.int32), 255)
|
| 164 |
+
morphed_img[mask > 0] = morphed_triangle[mask > 0]
|
| 165 |
+
|
| 166 |
+
return np.uint8(morphed_img)
|
| 167 |
|
| 168 |
+
# --- Main Video Processing Function (Optimized) ---
|
| 169 |
|
| 170 |
def process_video(video_path, ref_img, trans, res, step, feather, padding):
|
| 171 |
cap = cv2.VideoCapture(video_path)
|
| 172 |
fps = cap.get(cv2.CAP_PROP_FPS) or 24
|
| 173 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 174 |
|
| 175 |
+
# === 1. Process Reference Image ONCE before the loop ===
|
| 176 |
ref_bgr = cv2.cvtColor(ref_img, cv2.COLOR_RGB2BGR)
|
| 177 |
+
ref_results = face_mesh.process(ref_bgr)
|
| 178 |
+
ref_landmarks_full = get_landmarks_from_result(ref_results, ref_bgr.shape)
|
| 179 |
+
if ref_landmarks_full is None:
|
| 180 |
+
raise gr.Error("No face detected in the reference image. Please use a clear photo.")
|
| 181 |
+
|
| 182 |
+
_, ref_box = get_face_mask_box_from_landmarks(ref_landmarks_full, ref_bgr.shape, feather, padding)
|
| 183 |
+
xr, yr, wr, hr = ref_box
|
| 184 |
+
ref_cut = ref_bgr[yr:yr+hr, xr:xr+wr]
|
| 185 |
+
ref_morph_in = cv2.resize(ref_cut, (res, res))
|
| 186 |
+
|
| 187 |
+
# Scale reference landmarks to the 'res x res' morphing space
|
| 188 |
+
ref_landmarks_scaled = (ref_landmarks_full - [xr, yr]) * [res/wr, res/hr]
|
| 189 |
+
ref_landmarks_scaled = ref_landmarks_scaled[::step] # Apply sub-sampling
|
| 190 |
+
|
| 191 |
+
# === 2. Setup Output Video ===
|
| 192 |
w_o = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 193 |
h_o = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 194 |
tmp_vid = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
|
| 195 |
out_vid = cv2.VideoWriter(tmp_vid, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w_o, h_o))
|
| 196 |
+
|
| 197 |
+
# Storage for first frame previews
|
| 198 |
first_crop = None
|
|
|
|
|
|
|
| 199 |
first_morphed = None
|
| 200 |
|
| 201 |
+
# === 3. Process Video Frame by Frame ===
|
| 202 |
+
for i in range(total_frames):
|
| 203 |
ret, frame = cap.read()
|
| 204 |
if not ret: break
|
| 205 |
+
|
| 206 |
+
# Timestamp is crucial for tracking in video mode
|
| 207 |
+
timestamp_ms = int(cap.get(cv2.CAP_PROP_POS_MSEC))
|
| 208 |
+
|
| 209 |
+
# Process frame ONCE
|
| 210 |
+
results_src = face_mesh.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
| 211 |
+
|
| 212 |
+
if not results_src.multi_face_landmarks:
|
| 213 |
+
out_vid.write(frame) # Write original frame if no face is found
|
| 214 |
continue
|
| 215 |
+
|
| 216 |
+
src_landmarks_full = get_landmarks_from_result(results_src, frame.shape)
|
| 217 |
+
mask_roi, box = get_face_mask_box_from_landmarks(src_landmarks_full, frame.shape, feather, padding)
|
| 218 |
x, y, w, h = box
|
| 219 |
+
|
| 220 |
+
# Crop and resize source face
|
| 221 |
crop = frame[y:y+h, x:x+w]
|
| 222 |
crop_resized = cv2.resize(crop, (res, res))
|
| 223 |
+
|
| 224 |
+
# Scale source landmarks to the 'res x res' morphing space
|
| 225 |
+
src_landmarks_scaled = (src_landmarks_full - [x, y]) * [res/w, res/h]
|
| 226 |
+
src_landmarks_scaled = src_landmarks_scaled[::step] # Apply sub-sampling
|
| 227 |
+
|
| 228 |
+
# Perform morph
|
| 229 |
+
alpha = float(np.clip((trans + 1) / 2, 0, 1))
|
| 230 |
+
morphed_face = morph_faces(crop_resized, ref_morph_in, src_landmarks_scaled, ref_landmarks_scaled, alpha, res)
|
| 231 |
+
|
| 232 |
+
# Store first frame for preview
|
| 233 |
if i == 0:
|
| 234 |
first_crop = crop_resized.copy()
|
| 235 |
+
first_morphed = morphed_face.copy()
|
| 236 |
+
|
| 237 |
+
# Resize morphed face back and blend it onto the original frame
|
| 238 |
+
morphed_face_resized = cv2.resize(morphed_face, (w, h))
|
| 239 |
+
|
| 240 |
+
# Create a feathered mask for seamless blending
|
| 241 |
+
mask_norm = mask_roi.astype(np.float32) / 255.0
|
| 242 |
+
mask_expanded = mask_norm[..., None] # Add channel dimension for broadcasting
|
| 243 |
+
|
| 244 |
+
# Composite the morphed face
|
| 245 |
+
region = frame[y:y+h, x:x+w]
|
| 246 |
+
blended = region * (1 - mask_expanded) + morphed_face_resized * mask_expanded
|
| 247 |
frame[y:y+h, x:x+w] = blended.astype(np.uint8)
|
| 248 |
+
|
| 249 |
out_vid.write(frame)
|
| 250 |
|
| 251 |
+
cap.release()
|
| 252 |
+
out_vid.release()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
|
| 254 |
+
# Convert preview images for Gradio output
|
| 255 |
+
first_crop_rgb = cv2.cvtColor(first_crop, cv2.COLOR_BGR2RGB) if first_crop is not None else np.zeros((res,res,3), np.uint8)
|
| 256 |
+
ref_morph_in_rgb = cv2.cvtColor(ref_morph_in, cv2.COLOR_BGR2RGB)
|
| 257 |
+
first_morphed_rgb = cv2.cvtColor(first_morphed, cv2.COLOR_BGR2RGB) if first_morphed is not None else np.zeros((res,res,3), np.uint8)
|
| 258 |
+
|
| 259 |
+
return tmp_vid, first_crop_rgb, ref_morph_in_rgb, first_morphed_rgb
|
| 260 |
|
| 261 |
# --- Gradio App ---
|
| 262 |
css = """video, img { object-fit: contain !important; }"""
|
| 263 |
+
with gr.Blocks(css=css, theme=gr.themes.Soft()) as iface:
|
| 264 |
+
gr.Markdown("# ✨ Optimized Face Morphing ✨")
|
| 265 |
+
gr.Markdown("This version uses MediaPipe's video tracking for a **faster and smoother** result. Jitter is reduced by maintaining landmark context between frames.")
|
| 266 |
with gr.Row():
|
| 267 |
vid = gr.Video(label='Input Video')
|
| 268 |
ref = gr.Image(type='numpy', label='Reference Image')
|
| 269 |
with gr.Row():
|
| 270 |
+
res = gr.Dropdown([256, 384, 512], value=384, label='Morph Resolution')
|
| 271 |
+
step = gr.Slider(1, 4, value=4, step=1, label='Landmark Sub-sampling', info="Higher value is faster but less detailed.")
|
| 272 |
+
feather = gr.Slider(0, 50, value=15, step=1, label='Feather Radius', info="Softens the blend edge.")
|
| 273 |
+
padding = gr.Slider(0, 100, value=25, step=1, label='Crop Padding (px)', info="Expands the face area.")
|
| 274 |
+
trans = gr.Slider(-1.0, 1.0, value=-0.3, step=0.05, label='Morph Transition', info="-1.0 is original face, 1.0 is reference face.")
|
| 275 |
+
|
| 276 |
+
btn = gr.Button('Generate Morph 🚀', variant='primary')
|
| 277 |
+
|
| 278 |
+
with gr.Row():
|
| 279 |
+
out_vid = gr.Video(label='Morphed Video')
|
| 280 |
+
with gr.Row():
|
| 281 |
+
out_crop = gr.Image(label='First Frame Crop')
|
| 282 |
+
out_ref = gr.Image(label='Reference Face')
|
| 283 |
+
out_morph = gr.Image(label='Morphed First Frame')
|
| 284 |
|
| 285 |
btn.click(
|
| 286 |
fn=process_video,
|
| 287 |
+
inputs=[vid, ref, trans, res, step, feather, padding],
|
| 288 |
+
outputs=[out_vid, out_crop, out_ref, out_morph],
|
| 289 |
+
show_progress='full'
|
| 290 |
)
|
|
|
|
| 291 |
|
| 292 |
+
if __name__ == '__main__':
|
| 293 |
iface.launch(debug=True)
|
|
|