# import gradio as gr # import cv2 # import torch # import dlib # import numpy as np # from imutils import face_utils # from torchvision import models, transforms # from tempfile import NamedTemporaryFile # import shutil # # Load face detector and landmark predictor # face_detector = dlib.get_frontal_face_detector() # PREDICTOR_PATH = "./shape_predictor_81_face_landmarks.dat" # face_predictor = dlib.shape_predictor(PREDICTOR_PATH) # import torch # import torchvision.models as models # # Load pretrained ResNet-34 model # resnet34 = models.resnet34(weights=models.ResNet34_Weights.IMAGENET1K_V1) # resnet34.fc = torch.nn.Linear(resnet34.fc.in_features, 2) # ckpt_path = "./resnet34.pkl" # # Save model state dict # torch.save(resnet34.state_dict(), ckpt_path) # print(f"✅ Model saved at {ckpt_path}") # # Load deepfake detection model # model = models.resnet34() # model.fc = torch.nn.Linear(model.fc.in_features, 2) # model.load_state_dict(torch.load(ckpt_path, map_location="cpu")) # model.eval() # # Define transformation for face images # transform = transforms.Compose([ # transforms.ToPILImage(), # transforms.Resize((224, 224)), # transforms.ToTensor(), # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # ]) # def process_video(video_path: str): # cap = cv2.VideoCapture(video_path) # width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # fps = int(cap.get(cv2.CAP_PROP_FPS)) # output_path = video_path.replace(".mp4", "_processed.mp4") # output_video = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height)) # while cap.isOpened(): # ret, frame = cap.read() # if not ret: # break # rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # faces = face_detector(rgb_frame, 1) # for face in faces: # landmarks = face_utils.shape_to_np(face_predictor(rgb_frame, face)) # x_min, y_min = np.min(landmarks, axis=0) # x_max, y_max = np.max(landmarks, axis=0) # face_crop = rgb_frame[y_min:y_max, x_min:x_max] # if face_crop.size == 0: # continue # face_tensor = transform(face_crop).unsqueeze(0) # with torch.no_grad(): # output = torch.softmax(model(face_tensor), dim=1) # fake_confidence = output[0, 1].item() * 100 # Fake confidence as a percentage # label = "Fake" if fake_confidence > 50 else "Real" # color = (0, 0, 255) if label == "Fake" else (0, 255, 0) # # Annotating confidence score with label # label_text = f"{label} ({fake_confidence:.2f}%)" # cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2) # cv2.putText(frame, label_text, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1) # output_video.write(frame) # cap.release() # output_video.release() # return output_path # def gradio_interface(video_file): # if video_file is None: # return "Error: No video uploaded." # # Create a temporary file and copy the uploaded video content # with NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file: # temp_file_path = temp_file.name # # Read the uploaded video file using its path # with open(video_file, "rb") as uploaded_file: # temp_file.write(uploaded_file.read()) # output_path = process_video(temp_file_path) # return output_path # # Gradio UI # iface = gr.Interface( # fn=gradio_interface, # inputs=gr.Video(label="Upload Video"), # outputs=gr.Video(label="Processed Video"), # title="Deepfake Detection", # description="Upload a video to detect deepfakes. The model will process faces and classify them as real or fake." # ) # if __name__ == "__main__": # iface.launch() import gradio as gr import cv2 import torch import numpy as np import mediapipe as mp from torchvision import models, transforms from tempfile import NamedTemporaryFile # Initialize MediaPipe Face Detection and Face Mesh mp_face_detection = mp.solutions.face_detection mp_face_mesh = mp.solutions.face_mesh face_detection = mp_face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.5) face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5) def create_model(): model = models.resnet34(pretrained=False) # Must match the architecture used during training model.fc = torch.nn.Linear(model.fc.in_features, 2) # Adjust final layer return model # Load the trained model model = create_model() try: model.load_state_dict(torch.load("resnet34.pkl", map_location='cpu')) except RuntimeError as e: print(f"Error loading model: {e}") # Handle architecture mismatch (e.g., load weights manually) model.eval() # Define transformation for face images transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) def get_face_bbox(landmarks, frame_shape): h, w = frame_shape[:2] xs = [lm.x * w for lm in landmarks.landmark] ys = [lm.y * h for lm in landmarks.landmark] return int(min(xs)), int(min(ys)), int(max(xs)), int(max(ys)) def process_video(video_path: str): cap = cv2.VideoCapture(video_path) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(cap.get(cv2.CAP_PROP_FPS)) output_path = video_path.replace(".mp4", "_processed.mp4") output_video = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height)) while cap.isOpened(): ret, frame = cap.read() if not ret: break rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Face detection results = face_detection.process(rgb_frame) if results.detections: for detection in results.detections: # Get face landmarks mesh_results = face_mesh.process(rgb_frame) if mesh_results.multi_face_landmarks: for face_landmarks in mesh_results.multi_face_landmarks: x_min, y_min, x_max, y_max = get_face_bbox(face_landmarks, frame.shape) face_crop = rgb_frame[y_min:y_max, x_min:x_max] if face_crop.size == 0: continue face_tensor = transform(face_crop).unsqueeze(0) with torch.no_grad(): output = torch.softmax(model(face_tensor), dim=1) fake_confidence = output[0, 1].item() * 100 label = "Fake" if fake_confidence > 50 else "Real" color = (0, 0, 255) if label == "Fake" else (0, 255, 0) label_text = f"{label} ({fake_confidence:.2f}%)" cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2) cv2.putText(frame, label_text, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1) output_video.write(frame) cap.release() output_video.release() return output_path def gradio_interface(video_file): if video_file is None: return "Error: No video uploaded." with NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file: temp_file_path = temp_file.name with open(video_file, "rb") as uploaded_file: temp_file.write(uploaded_file.read()) output_path = process_video(temp_file_path) return output_path iface = gr.Interface( fn=gradio_interface, inputs=gr.Video(label="Upload Video"), outputs=gr.Video(label="Processed Video"), title="Deepfake Detection", description="Upload a video to detect deepfakes using MediaPipe face detection and ResNet-34 model." ) if __name__ == "__main__": iface.launch(share=True)