Spaces:

truens66
/

deepfake-video-detector

Running

App Files Files Community

deepfake-video-detector / app.py

truens66

Update app.py

28eea81 verified about 1 month ago

raw

history blame

8.26 kB

	# import gradio as gr
	# import cv2
	# import torch
	# import dlib
	# import numpy as np
	# from imutils import face_utils
	# from torchvision import models, transforms
	# from tempfile import NamedTemporaryFile
	# import shutil
	# # Load face detector and landmark predictor
	# face_detector = dlib.get_frontal_face_detector()
	# PREDICTOR_PATH = "./shape_predictor_81_face_landmarks.dat"
	# face_predictor = dlib.shape_predictor(PREDICTOR_PATH)

	# import torch
	# import torchvision.models as models

	# # Load pretrained ResNet-34 model
	# resnet34 = models.resnet34(weights=models.ResNet34_Weights.IMAGENET1K_V1)
	# resnet34.fc = torch.nn.Linear(resnet34.fc.in_features, 2)
	# ckpt_path = "./resnet34.pkl"

	# # Save model state dict
	# torch.save(resnet34.state_dict(), ckpt_path)
	# print(f"✅ Model saved at {ckpt_path}")

	# # Load deepfake detection model
	# model = models.resnet34()
	# model.fc = torch.nn.Linear(model.fc.in_features, 2)
	# model.load_state_dict(torch.load(ckpt_path, map_location="cpu"))
	# model.eval()

	# # Define transformation for face images
	# transform = transforms.Compose([
	# transforms.ToPILImage(),
	# transforms.Resize((224, 224)),
	# transforms.ToTensor(),
	# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
	# ])

	# def process_video(video_path: str):
	# cap = cv2.VideoCapture(video_path)
	# width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	# height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
	# fps = int(cap.get(cv2.CAP_PROP_FPS))

	# output_path = video_path.replace(".mp4", "_processed.mp4")
	# output_video = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

	# while cap.isOpened():
	# ret, frame = cap.read()
	# if not ret:
	# break

	# rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	# faces = face_detector(rgb_frame, 1)

	# for face in faces:
	# landmarks = face_utils.shape_to_np(face_predictor(rgb_frame, face))
	# x_min, y_min = np.min(landmarks, axis=0)
	# x_max, y_max = np.max(landmarks, axis=0)

	# face_crop = rgb_frame[y_min:y_max, x_min:x_max]
	# if face_crop.size == 0:
	# continue

	# face_tensor = transform(face_crop).unsqueeze(0)
	# with torch.no_grad():
	# output = torch.softmax(model(face_tensor), dim=1)
	# fake_confidence = output[0, 1].item() * 100 # Fake confidence as a percentage
	# label = "Fake" if fake_confidence > 50 else "Real"
	# color = (0, 0, 255) if label == "Fake" else (0, 255, 0)

	# # Annotating confidence score with label
	# label_text = f"{label} ({fake_confidence:.2f}%)"

	# cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
	# cv2.putText(frame, label_text, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)

	# output_video.write(frame)

	# cap.release()
	# output_video.release()
	# return output_path

	# def gradio_interface(video_file):
	# if video_file is None:
	# return "Error: No video uploaded."

	# # Create a temporary file and copy the uploaded video content
	# with NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file:
	# temp_file_path = temp_file.name
	# # Read the uploaded video file using its path
	# with open(video_file, "rb") as uploaded_file:
	# temp_file.write(uploaded_file.read())

	# output_path = process_video(temp_file_path)
	# return output_path

	# # Gradio UI
	# iface = gr.Interface(
	# fn=gradio_interface,
	# inputs=gr.Video(label="Upload Video"),
	# outputs=gr.Video(label="Processed Video"),
	# title="Deepfake Detection",
	# description="Upload a video to detect deepfakes. The model will process faces and classify them as real or fake."
	# )

	# if __name__ == "__main__":
	# iface.launch()









	import gradio as gr
	import cv2
	import torch
	import numpy as np
	import mediapipe as mp
	from torchvision import models, transforms
	from tempfile import NamedTemporaryFile

	# Initialize MediaPipe Face Detection and Face Mesh
	mp_face_detection = mp.solutions.face_detection
	mp_face_mesh = mp.solutions.face_mesh
	face_detection = mp_face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.5)
	face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)

	def create_model():
	model = models.resnet34(pretrained=False) # Must match the architecture used during training
	model.fc = torch.nn.Linear(model.fc.in_features, 2) # Adjust final layer
	return model

	# Load the trained model
	model = create_model()
	try:
	model.load_state_dict(torch.load("resnet34.pkl", map_location='cpu'))
	except RuntimeError as e:
	print(f"Error loading model: {e}")
	# Handle architecture mismatch (e.g., load weights manually)
	model.eval()

	# Define transformation for face images
	transform = transforms.Compose([
	transforms.ToPILImage(),
	transforms.Resize((224, 224)),
	transforms.ToTensor(),
	transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
	])

	def get_face_bbox(landmarks, frame_shape):
	h, w = frame_shape[:2]
	xs = [lm.x * w for lm in landmarks.landmark]
	ys = [lm.y * h for lm in landmarks.landmark]
	return int(min(xs)), int(min(ys)), int(max(xs)), int(max(ys))

	def process_video(video_path: str):
	cap = cv2.VideoCapture(video_path)
	width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
	fps = int(cap.get(cv2.CAP_PROP_FPS))

	output_path = video_path.replace(".mp4", "_processed.mp4")
	output_video = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

	# Face detection
	results = face_detection.process(rgb_frame)
	if results.detections:
	for detection in results.detections:
	# Get face landmarks
	mesh_results = face_mesh.process(rgb_frame)
	if mesh_results.multi_face_landmarks:
	for face_landmarks in mesh_results.multi_face_landmarks:
	x_min, y_min, x_max, y_max = get_face_bbox(face_landmarks, frame.shape)

	face_crop = rgb_frame[y_min:y_max, x_min:x_max]
	if face_crop.size == 0:
	continue

	face_tensor = transform(face_crop).unsqueeze(0)
	with torch.no_grad():
	output = torch.softmax(model(face_tensor), dim=1)
	fake_confidence = output[0, 1].item() * 100
	label = "Fake" if fake_confidence > 50 else "Real"
	color = (0, 0, 255) if label == "Fake" else (0, 255, 0)
	label_text = f"{label} ({fake_confidence:.2f}%)"

	cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
	cv2.putText(frame, label_text, (x_min, y_min - 10),
	cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)

	output_video.write(frame)

	cap.release()
	output_video.release()
	return output_path

	def gradio_interface(video_file):
	if video_file is None:
	return "Error: No video uploaded."

	with NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file:
	temp_file_path = temp_file.name
	with open(video_file, "rb") as uploaded_file:
	temp_file.write(uploaded_file.read())

	output_path = process_video(temp_file_path)
	return output_path

	iface = gr.Interface(
	fn=gradio_interface,
	inputs=gr.Video(label="Upload Video"),
	outputs=gr.Video(label="Processed Video"),
	title="Deepfake Detection",
	description="Upload a video to detect deepfakes using MediaPipe face detection and ResNet-34 model."
	)

	if __name__ == "__main__":
	iface.launch(share=True)