import gradio as gr
import cv2
import torch
import dlib
import numpy as np
from imutils import face_utils
from torchvision import models, transforms
from tempfile import NamedTemporaryFile

# Load face detector and landmark predictor
face_detector = dlib.get_frontal_face_detector()
PREDICTOR_PATH = "./lib/shape_predictor_81_face_landmarks.dat"
face_predictor = dlib.shape_predictor(PREDICTOR_PATH)

# Load deepfake detection model
model = models.resnet34()
model.fc = torch.nn.Linear(model.fc.in_features, 2)
ckpt_path = "./resnet34.pkl"
model.load_state_dict(torch.load(ckpt_path, map_location="cpu"))
model.eval()

# Define transformation for face images
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def process_video(video_path: str):
    cap = cv2.VideoCapture(video_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    output_path = video_path.replace(".mp4", "_processed.mp4")
    output_video = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        faces = face_detector(rgb_frame, 1)

        for face in faces:
            landmarks = face_utils.shape_to_np(face_predictor(rgb_frame, face))
            x_min, y_min = np.min(landmarks, axis=0)
            x_max, y_max = np.max(landmarks, axis=0)

            face_crop = rgb_frame[y_min:y_max, x_min:x_max]
            if face_crop.size == 0:
                continue

            face_tensor = transform(face_crop).unsqueeze(0)
            with torch.no_grad():
                output = torch.softmax(model(face_tensor), dim=1)
                fake_confidence = output[0, 1].item() * 100  # Fake confidence as a percentage
                label = "Fake" if fake_confidence > 50 else "Real"
                color = (0, 0, 255) if label == "Fake" else (0, 255, 0)

                # Annotating confidence score with label
                label_text = f"{label} ({fake_confidence:.2f}%)"

            cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
            cv2.putText(frame, label_text, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)

        output_video.write(frame)

    cap.release()
    output_video.release()
    return output_path

def gradio_interface(video_file):
    with NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file:
        temp_file.write(video_file.read())
        temp_path = temp_file.name

    output_path = process_video(temp_path)
    return output_path

# Gradio UI
iface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Video(label="Upload Video"),
    outputs=gr.Video(label="Processed Video"),
    title="Deepfake Detection",
    description="Upload a video to detect deepfakes. The model will process faces and classify them as real or fake."
)

if __name__ == "__main__":
    iface.launch()