deepfake-video-detector

Running

File size: 8,256 Bytes

# import gradio as gr
# import cv2
# import torch
# import dlib
# import numpy as np
# from imutils import face_utils
# from torchvision import models, transforms
# from tempfile import NamedTemporaryFile
# import shutil
# # Load face detector and landmark predictor
# face_detector = dlib.get_frontal_face_detector()
# PREDICTOR_PATH = "./shape_predictor_81_face_landmarks.dat"
# face_predictor = dlib.shape_predictor(PREDICTOR_PATH)

# import torch
# import torchvision.models as models

# # Load pretrained ResNet-34 model
# resnet34 = models.resnet34(weights=models.ResNet34_Weights.IMAGENET1K_V1)
# resnet34.fc = torch.nn.Linear(resnet34.fc.in_features, 2)
# ckpt_path = "./resnet34.pkl"

# # Save model state dict
# torch.save(resnet34.state_dict(), ckpt_path)
# print(f"✅ Model saved at {ckpt_path}")

# # Load deepfake detection model
# model = models.resnet34()
# model.fc = torch.nn.Linear(model.fc.in_features, 2)
# model.load_state_dict(torch.load(ckpt_path, map_location="cpu"))
# model.eval()

# # Define transformation for face images
# transform = transforms.Compose([
#     transforms.ToPILImage(),
#     transforms.Resize((224, 224)),
#     transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])

# def process_video(video_path: str):
#     cap = cv2.VideoCapture(video_path)
#     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
#     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
#     fps = int(cap.get(cv2.CAP_PROP_FPS))

#     output_path = video_path.replace(".mp4", "_processed.mp4")
#     output_video = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

#     while cap.isOpened():
#         ret, frame = cap.read()
#         if not ret:
#             break

#         rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#         faces = face_detector(rgb_frame, 1)

#         for face in faces:
#             landmarks = face_utils.shape_to_np(face_predictor(rgb_frame, face))
#             x_min, y_min = np.min(landmarks, axis=0)
#             x_max, y_max = np.max(landmarks, axis=0)

#             face_crop = rgb_frame[y_min:y_max, x_min:x_max]
#             if face_crop.size == 0:
#                 continue

#             face_tensor = transform(face_crop).unsqueeze(0)
#             with torch.no_grad():
#                 output = torch.softmax(model(face_tensor), dim=1)
#                 fake_confidence = output[0, 1].item() * 100  # Fake confidence as a percentage
#                 label = "Fake" if fake_confidence > 50 else "Real"
#                 color = (0, 0, 255) if label == "Fake" else (0, 255, 0)

#                 # Annotating confidence score with label
#                 label_text = f"{label} ({fake_confidence:.2f}%)"

#             cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
#             cv2.putText(frame, label_text, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)

#         output_video.write(frame)

#     cap.release()
#     output_video.release()
#     return output_path

# def gradio_interface(video_file):
#     if video_file is None:
#         return "Error: No video uploaded."

#     # Create a temporary file and copy the uploaded video content
#     with NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file:
#         temp_file_path = temp_file.name
#         # Read the uploaded video file using its path
#         with open(video_file, "rb") as uploaded_file:
#             temp_file.write(uploaded_file.read())

#     output_path = process_video(temp_file_path)
#     return output_path

# # Gradio UI
# iface = gr.Interface(
#     fn=gradio_interface,
#     inputs=gr.Video(label="Upload Video"),
#     outputs=gr.Video(label="Processed Video"),
#     title="Deepfake Detection",
#     description="Upload a video to detect deepfakes. The model will process faces and classify them as real or fake."
# )

# if __name__ == "__main__":
#     iface.launch()









import gradio as gr
import cv2
import torch
import numpy as np
import mediapipe as mp
from torchvision import models, transforms
from tempfile import NamedTemporaryFile

# Initialize MediaPipe Face Detection and Face Mesh
mp_face_detection = mp.solutions.face_detection
mp_face_mesh = mp.solutions.face_mesh
face_detection = mp_face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.5)
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)

def create_model():
    model = models.resnet34(pretrained=False)  # Must match the architecture used during training
    model.fc = torch.nn.Linear(model.fc.in_features, 2)  # Adjust final layer
    return model

# Load the trained model
model = create_model()
try:
    model.load_state_dict(torch.load("resnet34.pkl", map_location='cpu'))
except RuntimeError as e:
    print(f"Error loading model: {e}")
    # Handle architecture mismatch (e.g., load weights manually)
model.eval()

# Define transformation for face images
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def get_face_bbox(landmarks, frame_shape):
    h, w = frame_shape[:2]
    xs = [lm.x * w for lm in landmarks.landmark]
    ys = [lm.y * h for lm in landmarks.landmark]
    return int(min(xs)), int(min(ys)), int(max(xs)), int(max(ys))

def process_video(video_path: str):
    cap = cv2.VideoCapture(video_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    output_path = video_path.replace(".mp4", "_processed.mp4")
    output_video = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Face detection
        results = face_detection.process(rgb_frame)
        if results.detections:
            for detection in results.detections:
                # Get face landmarks
                mesh_results = face_mesh.process(rgb_frame)
                if mesh_results.multi_face_landmarks:
                    for face_landmarks in mesh_results.multi_face_landmarks:
                        x_min, y_min, x_max, y_max = get_face_bbox(face_landmarks, frame.shape)
                        
                        face_crop = rgb_frame[y_min:y_max, x_min:x_max]
                        if face_crop.size == 0:
                            continue

                        face_tensor = transform(face_crop).unsqueeze(0)
                        with torch.no_grad():
                            output = torch.softmax(model(face_tensor), dim=1)
                            fake_confidence = output[0, 1].item() * 100
                            label = "Fake" if fake_confidence > 50 else "Real"
                            color = (0, 0, 255) if label == "Fake" else (0, 255, 0)
                            label_text = f"{label} ({fake_confidence:.2f}%)"

                        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
                        cv2.putText(frame, label_text, (x_min, y_min - 10), 
                                  cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)

        output_video.write(frame)

    cap.release()
    output_video.release()
    return output_path

def gradio_interface(video_file):
    if video_file is None:
        return "Error: No video uploaded."

    with NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file:
        temp_file_path = temp_file.name
        with open(video_file, "rb") as uploaded_file:
            temp_file.write(uploaded_file.read())

    output_path = process_video(temp_file_path)
    return output_path

iface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Video(label="Upload Video"),
    outputs=gr.Video(label="Processed Video"),
    title="Deepfake Detection",
    description="Upload a video to detect deepfakes using MediaPipe face detection and ResNet-34 model."
)

if __name__ == "__main__":
    iface.launch(share=True)