aiwithoutborders-xyz
/

OpenSight-CommunityForensics-Deepfake-ViT

Image Classification

deepfake_detection

Model card Files Files and versions Community

OpenSight-CommunityForensics-Deepfake-ViT / scripts /resample_evalset.py

LPX55's picture

chore: resample-prep

e174112 verified 13 days ago

history blame contribute delete

2.78 kB

	import os
	import random
	import cv2
	from datetime import datetime
	import logging

	# Set up logging configuration
	log_file = "sample_images.log"
	logging.basicConfig(filename=log_file, level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s')

	def detect_faces(image_path):
	# Load the pre-trained Haar Cascade model for face detection
	face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

	# Read the image in grayscale
	image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
	if image is None:
	return False

	# Detect faces in the image
	faces = face_cascade.detectMultiScale(image, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

	# Return True if at least one face is detected
	return len(faces) > 0

	def sample_images(input_folder, output_folder, sample_rate=0.2):
	# Ensure the output folder exists
	if not os.path.exists(output_folder):
	os.makedirs(output_folder)

	# Initialize counters and start time
	total_files = 0
	sampled_files = 0
	start_time = datetime.now()

	# Walk through the input folder structure
	for root, dirs, files in os.walk(input_folder):
	relative_path = os.path.relpath(root, input_folder)
	output_subfolder = os.path.join(output_folder, relative_path)

	if not os.path.exists(output_subfolder):
	os.makedirs(output_subfolder)

	total_files += len(files)

	# Sample files in this directory
	sampled_files_this_batch = []
	for file in files:
	if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
	input_file_path = os.path.join(root, file)
	if detect_faces(input_file_path):
	sampled_files_this_batch.append(file)

	sampled_files += len(sampled_files_this_batch)

	for file in files:
	if file in sampled_files_this_batch:
	input_file_path = os.path.join(root, file)
	output_file_path = os.path.join(output_subfolder, file)
	os.link(input_file_path, output_file_path)

	# Log the action
	logging.info(f"Sampled and copied {input_file_path} to {output_file_path}")

	elapsed_time = datetime.now() - start_time
	print(f"Processed {sampled_files}/{total_files} files in {elapsed_time}")

	end_time = datetime.now()
	total_time = end_time - start_time
	logging.info(f"Total time taken: {total_time}")
	logging.info(f"Sampled {sampled_files} out of {total_files} files.")

	if __name__ == "__main__":
	input_folder = "EvalSet"
	output_folder = "resampledEvalSet"
	sample_images(input_folder, output_folder)