truens66's picture
Update app.py
28eea81 verified
raw
history blame
8.26 kB
# import gradio as gr
# import cv2
# import torch
# import dlib
# import numpy as np
# from imutils import face_utils
# from torchvision import models, transforms
# from tempfile import NamedTemporaryFile
# import shutil
# # Load face detector and landmark predictor
# face_detector = dlib.get_frontal_face_detector()
# PREDICTOR_PATH = "./shape_predictor_81_face_landmarks.dat"
# face_predictor = dlib.shape_predictor(PREDICTOR_PATH)
# import torch
# import torchvision.models as models
# # Load pretrained ResNet-34 model
# resnet34 = models.resnet34(weights=models.ResNet34_Weights.IMAGENET1K_V1)
# resnet34.fc = torch.nn.Linear(resnet34.fc.in_features, 2)
# ckpt_path = "./resnet34.pkl"
# # Save model state dict
# torch.save(resnet34.state_dict(), ckpt_path)
# print(f"✅ Model saved at {ckpt_path}")
# # Load deepfake detection model
# model = models.resnet34()
# model.fc = torch.nn.Linear(model.fc.in_features, 2)
# model.load_state_dict(torch.load(ckpt_path, map_location="cpu"))
# model.eval()
# # Define transformation for face images
# transform = transforms.Compose([
# transforms.ToPILImage(),
# transforms.Resize((224, 224)),
# transforms.ToTensor(),
# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])
# def process_video(video_path: str):
# cap = cv2.VideoCapture(video_path)
# width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
# height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# fps = int(cap.get(cv2.CAP_PROP_FPS))
# output_path = video_path.replace(".mp4", "_processed.mp4")
# output_video = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
# while cap.isOpened():
# ret, frame = cap.read()
# if not ret:
# break
# rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# faces = face_detector(rgb_frame, 1)
# for face in faces:
# landmarks = face_utils.shape_to_np(face_predictor(rgb_frame, face))
# x_min, y_min = np.min(landmarks, axis=0)
# x_max, y_max = np.max(landmarks, axis=0)
# face_crop = rgb_frame[y_min:y_max, x_min:x_max]
# if face_crop.size == 0:
# continue
# face_tensor = transform(face_crop).unsqueeze(0)
# with torch.no_grad():
# output = torch.softmax(model(face_tensor), dim=1)
# fake_confidence = output[0, 1].item() * 100 # Fake confidence as a percentage
# label = "Fake" if fake_confidence > 50 else "Real"
# color = (0, 0, 255) if label == "Fake" else (0, 255, 0)
# # Annotating confidence score with label
# label_text = f"{label} ({fake_confidence:.2f}%)"
# cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
# cv2.putText(frame, label_text, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
# output_video.write(frame)
# cap.release()
# output_video.release()
# return output_path
# def gradio_interface(video_file):
# if video_file is None:
# return "Error: No video uploaded."
# # Create a temporary file and copy the uploaded video content
# with NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file:
# temp_file_path = temp_file.name
# # Read the uploaded video file using its path
# with open(video_file, "rb") as uploaded_file:
# temp_file.write(uploaded_file.read())
# output_path = process_video(temp_file_path)
# return output_path
# # Gradio UI
# iface = gr.Interface(
# fn=gradio_interface,
# inputs=gr.Video(label="Upload Video"),
# outputs=gr.Video(label="Processed Video"),
# title="Deepfake Detection",
# description="Upload a video to detect deepfakes. The model will process faces and classify them as real or fake."
# )
# if __name__ == "__main__":
# iface.launch()
import gradio as gr
import cv2
import torch
import numpy as np
import mediapipe as mp
from torchvision import models, transforms
from tempfile import NamedTemporaryFile
# Initialize MediaPipe Face Detection and Face Mesh
mp_face_detection = mp.solutions.face_detection
mp_face_mesh = mp.solutions.face_mesh
face_detection = mp_face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.5)
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)
def create_model():
model = models.resnet34(pretrained=False) # Must match the architecture used during training
model.fc = torch.nn.Linear(model.fc.in_features, 2) # Adjust final layer
return model
# Load the trained model
model = create_model()
try:
model.load_state_dict(torch.load("resnet34.pkl", map_location='cpu'))
except RuntimeError as e:
print(f"Error loading model: {e}")
# Handle architecture mismatch (e.g., load weights manually)
model.eval()
# Define transformation for face images
transform = transforms.Compose([
transforms.ToPILImage(),
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
def get_face_bbox(landmarks, frame_shape):
h, w = frame_shape[:2]
xs = [lm.x * w for lm in landmarks.landmark]
ys = [lm.y * h for lm in landmarks.landmark]
return int(min(xs)), int(min(ys)), int(max(xs)), int(max(ys))
def process_video(video_path: str):
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
output_path = video_path.replace(".mp4", "_processed.mp4")
output_video = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Face detection
results = face_detection.process(rgb_frame)
if results.detections:
for detection in results.detections:
# Get face landmarks
mesh_results = face_mesh.process(rgb_frame)
if mesh_results.multi_face_landmarks:
for face_landmarks in mesh_results.multi_face_landmarks:
x_min, y_min, x_max, y_max = get_face_bbox(face_landmarks, frame.shape)
face_crop = rgb_frame[y_min:y_max, x_min:x_max]
if face_crop.size == 0:
continue
face_tensor = transform(face_crop).unsqueeze(0)
with torch.no_grad():
output = torch.softmax(model(face_tensor), dim=1)
fake_confidence = output[0, 1].item() * 100
label = "Fake" if fake_confidence > 50 else "Real"
color = (0, 0, 255) if label == "Fake" else (0, 255, 0)
label_text = f"{label} ({fake_confidence:.2f}%)"
cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
cv2.putText(frame, label_text, (x_min, y_min - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
output_video.write(frame)
cap.release()
output_video.release()
return output_path
def gradio_interface(video_file):
if video_file is None:
return "Error: No video uploaded."
with NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file:
temp_file_path = temp_file.name
with open(video_file, "rb") as uploaded_file:
temp_file.write(uploaded_file.read())
output_path = process_video(temp_file_path)
return output_path
iface = gr.Interface(
fn=gradio_interface,
inputs=gr.Video(label="Upload Video"),
outputs=gr.Video(label="Processed Video"),
title="Deepfake Detection",
description="Upload a video to detect deepfakes using MediaPipe face detection and ResNet-34 model."
)
if __name__ == "__main__":
iface.launch(share=True)