Spaces:
Runtime error
Runtime error
''' | |
# author: Zhiyuan Yan | |
# email: [email protected] | |
# date: 2023-03-30 | |
The code is specifically designed for generating nearest sample pairs for Face X-ray. | |
Alternatively, you can utilize the pre-generated pkl files available in our GitHub repository. Please refer to the "Releases" section on our repository for accessing these files. | |
''' | |
import os | |
import json | |
import pickle | |
import numpy as np | |
import heapq | |
import random | |
from tqdm import tqdm | |
from scipy.spatial import KDTree | |
def load_landmark(file_path): | |
""" | |
Load 2D facial landmarks from a file path. | |
Args: | |
file_path: A string indicating the path to the landmark file. | |
Returns: | |
A numpy array containing the loaded landmarks. | |
Raises: | |
None. | |
""" | |
if file_path is None: | |
return np.zeros((81, 2)) | |
if os.path.exists(file_path): | |
landmark = np.load(file_path) | |
return np.float32(landmark) | |
else: | |
return np.zeros((81, 2)) | |
def get_landmark_dict(dataset_folder): | |
# Check if the dictionary has already been created | |
if os.path.exists('landmark_dict_ff.pkl'): | |
with open('landmark_dict_ff.pkl', 'rb') as f: | |
return pickle.load(f) | |
# Open the metadata file for the current folder | |
metadata_path = os.path.join(dataset_folder, "FaceForensics++.json") | |
with open(metadata_path, "r") as f: | |
metadata = json.load(f) | |
# Iterate over the metadata entries and add the landmark paths to the list | |
ff_real_data = metadata['FaceForensics++']['FF-real'] | |
# Using dictionary comprehension to generate the landmark_dict | |
landmark_dict = { | |
frame_path.replace('frames', 'landmarks').replace(".png", ".npy"): load_landmark( | |
frame_path.replace('frames', 'landmarks').replace(".png", ".npy") | |
) | |
for mode, value in ff_real_data.items() | |
for video_name, video_info in tqdm(value['c23'].items()) | |
for frame_path in video_info['frames'] | |
} | |
# Save the dictionary to a pickle file | |
with open('landmark_dict_ffall.pkl', 'wb') as f: | |
pickle.dump(landmark_dict, f) | |
return landmark_dict | |
def get_nearest_faces_fixed_pair(landmark_info, num_neighbors): | |
''' | |
Using KDTree to find the nearest faces for each image (Much faster!!) | |
''' | |
random.seed(1024) # Fix the random seed for reproducibility | |
# Check if the dictionary has already been created | |
if os.path.exists('nearest_face_info.pkl'): | |
with open('nearest_face_info.pkl', 'rb') as f: | |
return pickle.load(f) | |
landmarks_array = np.array([lmk.flatten() for lmk in landmark_info.values()]) | |
landmark_ids = list(landmark_info.keys()) | |
# Build a KDTree using the flattened landmarks | |
tree = KDTree(landmarks_array) | |
nearest_faces = {} | |
for idx, this_lmk in tqdm(enumerate(landmarks_array), total=len(landmarks_array)): | |
# Query the KDTree for the nearest neighbors (excluding itself) | |
dists, indices = tree.query(this_lmk, k=num_neighbors + 1) | |
# Randomly pick one from the nearest N neighbors (excluding itself) | |
picked_idx = random.choice(indices[1:]) | |
nearest_faces[landmark_ids[idx]] = landmark_ids[picked_idx] | |
# Save the dictionary to a pickle file | |
with open('nearest_face_info.pkl', 'wb') as f: | |
pickle.dump(nearest_faces, f) | |
return nearest_faces | |
def get_nearest_faces(landmark_info, num_neighbors): | |
''' | |
Using KDTree to find the nearest faces for each image (Much faster!!) | |
''' | |
random.seed(1024) # Fix the random seed for reproducibility | |
# Check if the dictionary has already been created | |
if os.path.exists('nearest_face_info.pkl'): | |
with open('nearest_face_info.pkl', 'rb') as f: | |
return pickle.load(f) | |
landmarks_array = np.array([lmk.flatten() for lmk in landmark_info.values()]) | |
landmark_ids = list(landmark_info.keys()) | |
# Build a KDTree using the flattened landmarks | |
tree = KDTree(landmarks_array) | |
nearest_faces = {} | |
for idx, this_lmk in tqdm(enumerate(landmarks_array), total=len(landmarks_array)): | |
# Query the KDTree for the nearest neighbors (excluding itself) | |
dists, indices = tree.query(this_lmk, k=num_neighbors + 1) | |
# Store the nearest N neighbors (excluding itself) | |
nearest_faces[landmark_ids[idx]] = [landmark_ids[i] for i in indices[1:]] | |
# Save the dictionary to a pickle file | |
with open('nearest_face_info.pkl', 'wb') as f: | |
pickle.dump(nearest_faces, f) | |
return nearest_faces | |
# Load the landmark dictionary and obtain the landmark dict | |
dataset_folder = "/home/zhiyuanyan/disfin/deepfake_benchmark/preprocessing/dataset_json/" | |
landmark_info = get_landmark_dict(dataset_folder) | |
# Get the nearest faces for each image (in landmark_dict) | |
num_neighbors = 100 | |
nearest_faces_info = get_nearest_faces(landmark_info, num_neighbors) # running time: about 20 mins | |