Spaces:

ArissBandoss
/

DeepFake-Videos-Detection

Runtime error

App Files Files Community

anyantudre commited on Jan 5

Commit

caa56d6

0 Parent(s):

moved from training repo to inference

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +35 -0
.github/workflows/main.yml +28 -0
.gitignore +12 -0
app.py +132 -0
inference.py +117 -0
requirements.txt +33 -0
training/config/__init__.py +7 -0
training/config/config/__init__.py +7 -0
training/config/config/backbone/cls_hrnet_w48.yaml +103 -0
training/config/config/detector/efficientnetb4.yaml +88 -0
training/config/config/detector/resnet34.yaml +87 -0
training/config/config/detector/ucf.yaml +130 -0
training/config/config/detector/xception.yaml +86 -0
training/config/config/test_config.yaml +38 -0
training/config/config/train_config.yaml +43 -0
training/config/detector/efficientnetb4.yaml +88 -0
training/config/detector/ucf.yaml +131 -0
training/config/detector/xception.yaml +86 -0
training/config/test_config.yaml +38 -0
training/config/train_config.yaml +43 -0
training/dataset/I2G_dataset.py +389 -0
training/dataset/__init__.py +19 -0
training/dataset/abstract_dataset.py +621 -0
training/dataset/albu.py +99 -0
training/dataset/face_utils.py +238 -0
training/dataset/ff_blend.py +572 -0
training/dataset/fwa_blend.py +548 -0
training/dataset/generate_parsing_mask.py +129 -0
training/dataset/generate_xray_nearest.py +136 -0
training/dataset/iid_dataset.py +116 -0
training/dataset/library/000_0000.png +0 -0
training/dataset/library/001_0000.png +0 -0
training/dataset/library/DeepFakeMask.py +181 -0
training/dataset/library/LICENSE +674 -0
training/dataset/library/README.md +12 -0
training/dataset/library/all_in_one.jpg +0 -0
training/dataset/library/bi_online_generation.py +241 -0
training/dataset/library/precomuted_landmarks.json +1 -0
training/dataset/lrl_dataset.py +139 -0
training/dataset/lsda_dataset.py +382 -0
training/dataset/pair_dataset.py +150 -0
training/dataset/sbi_api.py +371 -0
training/dataset/sbi_dataset.py +139 -0
training/dataset/tall_dataset.py +183 -0
training/dataset/utils/DeepFakeMask.py +402 -0
training/dataset/utils/SLADD.py +163 -0
training/dataset/utils/attribution_mask.py +55 -0
training/dataset/utils/bi_online_generation.py +289 -0
training/dataset/utils/bi_online_generation_yzy.py +268 -0
training/dataset/utils/color_transfer.py +516 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.github/workflows/main.yml ADDED Viewed

	@@ -0,0 +1,28 @@

+name: CI
+# Controls when the workflow will run
+on:
+  # Triggers the workflow on push events but only for the "main" branch
+  push:
+    branches: [ "main" ]
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Add remote
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: git remote add space https://ArissBandoss:[email protected]/spaces/ArissBandoss/DeepFake-Videos-Detection
+      - name: Push to hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: git push --force https://ArissBandoss:[email protected]/spaces/ArissBandoss/DeepFake-Videos-Detection main

.gitignore ADDED Viewed

	@@ -0,0 +1,12 @@

+.idea
+*__pycache__*
+*.vscode*
+*.pyc
+*.pth
+*.pt
+*.dat
+audios-testing
+temp_video_frames
+.gradio
+*.zip
+*.npy

app.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import os
+import cv2
+import torch
+import numpy as np
+from torchvision import transforms
+from PIL import Image
+from tqdm import tqdm
+from training.detectors import DETECTOR
+import yaml
+import gradio as gr
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# available models in the repository
+AVAILABLE_MODELS = [
+    "xception",
+    "ucf",
+]
+# load the model
+def load_model(model_name, config_path, weights_path):
+    with open(config_path, 'r') as f:
+        config = yaml.safe_load(f)
+    config['model_name'] = model_name
+    model_class = DETECTOR[model_name]
+    model = model_class(config).to(device)
+    checkpoint = torch.load(weights_path, map_location=device)
+    model.load_state_dict(checkpoint, strict=True)
+    model.eval()
+    return model
+# preprocess a single video
+def preprocess_video(video_path, output_dir, frame_num=32):
+    os.makedirs(output_dir, exist_ok=True)
+    frames_dir = os.path.join(output_dir, "frames")
+    os.makedirs(frames_dir, exist_ok=True)
+    cap = cv2.VideoCapture(video_path)
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    frame_indices = np.linspace(0, total_frames - 1, frame_num, dtype=int)
+    # extract frames
+    frames = []
+    for idx in frame_indices:
+        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
+        ret, frame = cap.read()
+        if ret:
+            frame_path = os.path.join(frames_dir, f"frame_{idx:04d}.png")
+            cv2.imwrite(frame_path, frame)
+            frames.append(frame_path)
+    cap.release()
+    return frames
+#  inference on a single video
+def infer_video(video_path, model, device):
+    # Preprocess the video
+    output_dir = "temp_video_frames"
+    frames = preprocess_video(video_path, output_dir)
+    transform = transforms.Compose([
+        transforms.Resize((256, 256)),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+    ])
+    probs = []
+    for frame_path in frames:
+        frame = Image.open(frame_path).convert("RGB")
+        frame = transform(frame).unsqueeze(0).to(device)
+        data_dict = {
+            "image": frame,
+            "label": torch.tensor([0]).to(device),  # Dummy label
+            "label_spe": torch.tensor([0]).to(device),  # Dummy specific label
+        }
+        with torch.no_grad():
+            pred_dict = model(data_dict, inference=True)
+            logits = pred_dict["cls"]  # Shape: [batch_size, num_classes]
+            prob = torch.softmax(logits, dim=1)[:, 1].item()  # Probability of being "fake"
+            probs.append(prob)
+    # aggregate predictions (e.g., average probability)
+    avg_prob = np.mean(probs)
+    prediction = "Fake" if avg_prob > 0.5 else "Real"
+    return prediction, avg_prob
+# gradio inference function
+def gradio_inference(video, model_name):
+    config_path = f"/teamspace/studios/this_studio/DeepfakeBench/training/config/detector/{model_name}.yaml"
+    weights_path = f"/teamspace/studios/this_studio/DeepfakeBench/training/weights/{model_name}_best.pth"
+    if not os.path.exists(config_path):
+        return f"Error: Config file for model '{model_name}' not found at {config_path}."
+    if not os.path.exists(weights_path):
+        return f"Error: Weights file for model '{model_name}' not found at {weights_path}."
+    model = load_model(model_name, config_path, weights_path)
+    prediction, confidence = infer_video(video, model, device)
+    return f"Model: {model_name}\nPrediction: {prediction} (Confidence: {confidence:.4f})"
+# Gradio App
+def create_gradio_app():
+    with gr.Blocks() as demo:
+        gr.Markdown("# Deepfake Detection Demo")
+        gr.Markdown("Upload a video and select a model to detect if it's real or fake.")
+        with gr.Row():
+            video_input = gr.Video(label="Upload Video")
+            model_dropdown = gr.Dropdown(choices=AVAILABLE_MODELS, label="Select Model", value="xception")
+        output_text = gr.Textbox(label="Prediction Result")
+        submit_button = gr.Button("Run Inference")
+        submit_button.click(
+            fn=gradio_inference,
+            inputs=[video_input, model_dropdown],
+            outputs=output_text,
+        )
+    return demo
+if __name__ == "__main__":
+    demo = create_gradio_app()
+    demo.launch(share=True)

inference.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import os
+import cv2
+import torch
+import numpy as np
+from torchvision import transforms
+from PIL import Image
+from tqdm import tqdm
+from training.detectors import DETECTOR
+import yaml
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# load the model
+def load_model(model_name, config_path, weights_path):
+    with open(config_path, 'r') as f:
+        config = yaml.safe_load(f)
+    config['model_name'] = model_name
+    model_class = DETECTOR[model_name]
+    model = model_class(config).to(device)
+    checkpoint = torch.load(weights_path, map_location=device)
+    model.load_state_dict(checkpoint, strict=True)
+    model.eval()
+    return model
+# preprocess a single video
+def preprocess_video(video_path, output_dir, frame_num=32):
+    os.makedirs(output_dir, exist_ok=True)
+    frames_dir = os.path.join(output_dir, "frames")
+    os.makedirs(frames_dir, exist_ok=True)
+    cap = cv2.VideoCapture(video_path)
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    frame_indices = np.linspace(0, total_frames - 1, frame_num, dtype=int)
+    # extract frames
+    frames = []
+    for idx in frame_indices:
+        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
+        ret, frame = cap.read()
+        if ret:
+            frame_path = os.path.join(frames_dir, f"frame_{idx:04d}.png")
+            cv2.imwrite(frame_path, frame)
+            frames.append(frame_path)
+    cap.release()
+    return frames
+# inference on a single video
+def infer_video(video_path, model, device):
+    output_dir = "temp_video_frames"
+    frames = preprocess_video(video_path, output_dir)
+    transform = transforms.Compose([
+        transforms.Resize((256, 256)),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+    ])
+    probs = []
+    for frame_path in frames:
+        frame = Image.open(frame_path).convert("RGB")
+        frame = transform(frame).unsqueeze(0).to(device)
+        data_dict = {
+            "image": frame,
+            "label": torch.tensor([0]).to(device),  # Dummy label
+            "label_spe": torch.tensor([0]).to(device),  # Dummy specific label
+        }
+        with torch.no_grad():
+            pred_dict = model(data_dict, inference=True)
+            logits = pred_dict["cls"]  # Shape: [batch_size, num_classes]
+            prob = torch.softmax(logits, dim=1)[:, 1].item()  # Probability of being "fake"
+            probs.append(prob)
+    avg_prob = np.mean(probs)
+    prediction = "Fake" if avg_prob > 0.5 else "Real"
+    return prediction, avg_prob
+# main function for terminal-based inference
+def main(video_filename, model_name):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    config_path = f"/teamspace/studios/this_studio/DeepfakeBench/training/config/detector/{model_name}.yaml"
+    weights_path = f"/teamspace/studios/this_studio/DeepfakeBench/training/weights/{model_name}_best.pth"
+    if not os.path.exists(config_path):
+        print(f"Error: Config file for model '{model_name}' not found at {config_path}.")
+        return
+    if not os.path.exists(weights_path):
+        print(f"Error: Weights file for model '{model_name}' not found at {weights_path}.")
+        return
+    model = load_model(model_name, config_path, weights_path)
+    video_path = os.path.join(os.getcwd(), video_filename)
+    if not os.path.exists(video_path):
+        print(f"Error: Video file '{video_filename}' not found in the current directory.")
+        return
+    prediction, confidence = infer_video(video_path, model, device)
+    print(f"Model: {model_name}")
+    print(f"Prediction: {prediction} (Confidence: {confidence:.4f})")
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) != 3:
+        print("Usage: python inference_script.py <video_filename> <model_name>")
+        print("Available models: xception, meso4, meso4Inception, efficientnetb4, ucf, etc.")
+    else:
+        video_filename = sys.argv[1]
+        model_name = sys.argv[2]
+        main(video_filename, model_name)

requirements.txt ADDED Viewed

	@@ -0,0 +1,33 @@

+numpy==1.21.5
+pandas==1.4.2
+Pillow==9.0.1
+dlib==19.24.0
+imageio==2.9.0
+imgaug==0.4.0
+tqdm==4.61.0
+scipy==1.7.3
+seaborn==0.11.2
+pyyaml==6.0
+imutils==0.5.4
+opencv-python==4.6.0.66
+scikit-image==0.19.2
+scikit-learn==1.0.2
+albumentations==1.1.0
+torch==1.12.0
+torchvision==0.13.0
+torchaudio==0.12.0
+efficientnet-pytorch==0.7.1
+timm==0.6.12
+segmentation-models-pytorch==0.3.2
+torchtoolbox==0.1.8.2
+tensorboard==2.10.1
+setuptools==59.5.0
+loralib
+einops
+transformers
+filterpy
+simplejson
+kornia
+fvcore
+imgaug==0.4.0
+git+https://github.com/openai/CLIP.git

training/config/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import os
+import sys
+current_file_path = os.path.abspath(__file__)
+parent_dir = os.path.dirname(os.path.dirname(current_file_path))
+project_root_dir = os.path.dirname(parent_dir)
+sys.path.append(parent_dir)
+sys.path.append(project_root_dir)

training/config/config/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import os
+import sys
+current_file_path = os.path.abspath(__file__)
+parent_dir = os.path.dirname(os.path.dirname(current_file_path))
+project_root_dir = os.path.dirname(parent_dir)
+sys.path.append(parent_dir)
+sys.path.append(project_root_dir)

training/config/config/backbone/cls_hrnet_w48.yaml ADDED Viewed

	@@ -0,0 +1,103 @@

+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+GPUS: (0,1,2,3)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 4
+PRINT_FREQ: 100
+DATASET:
+  DATASET: lip
+  ROOT: 'data/'
+  TEST_SET: 'list/lip/valList.txt'
+  TRAIN_SET: 'list/lip/trainList.txt'
+  NUM_CLASSES: 20
+MODEL:
+  NAME: cls_hrnet
+  #IMAGE_SIZE:
+  #  - 224
+  #  - 224
+  EXTRA:
+    STAGE1:
+      NUM_MODULES: 1
+      NUM_RANCHES: 1
+      BLOCK: BOTTLENECK
+      NUM_BLOCKS:
+      - 4
+      NUM_CHANNELS:
+      - 64
+      FUSE_METHOD: SUM
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 48
+      - 96
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 48
+      - 96
+      - 192
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 48
+      - 96
+      - 192
+      - 384
+      FUSE_METHOD: SUM
+LOSS:
+  USE_OHEM: false
+  OHEMTHRES: 0.9
+  OHEMKEEP: 131072
+TRAIN:
+  IMAGE_SIZE:
+  - 473
+  - 473
+  BASE_SIZE: 473
+  BATCH_SIZE_PER_GPU: 10
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 150
+  RESUME: true
+  OPTIMIZER: sgd
+  LR: 0.007
+  WD: 0.0005
+  MOMENTUM: 0.9
+  NESTEROV: false
+  FLIP: true
+  MULTI_SCALE: true
+  DOWNSAMPLERATE: 1
+  IGNORE_LABEL: 255
+  SCALE_FACTOR: 11
+TEST:
+  IMAGE_SIZE:
+  - 473
+  - 473
+  BASE_SIZE: 473
+  BATCH_SIZE_PER_GPU: 16
+  NUM_SAMPLES: 2000
+  FLIP_TEST: false
+  MULTI_SCALE: false

training/config/config/detector/efficientnetb4.yaml ADDED Viewed

	@@ -0,0 +1,88 @@

+# log dir
+log_dir: logs/evaluations/effnb4
+# model setting
+# pretrained: /home/zhiyuanyan/disfin/deepfake_benchmark/training/pretrained/xception-b5690688.pth   # path to a pre-trained model, if using one
+pretrained: ./training/pretrained/efficientnet-b4-6ed6700e.pth   # path to a pre-trained model, if using one
+model_name: efficientnetb4   # model name
+backbone_name: efficientnetb4  # backbone name
+#backbone setting
+backbone_config:
+  num_classes: 2
+  inc: 3
+  dropout: false
+  mode: Original
+# dataset
+all_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT, FaceShifter, DeepFakeDetection, Celeb-DF-v1, Celeb-DF-v2, DFDCP, DFDC, DeeperForensics-1.0, UADFV]
+train_dataset: [FF-NT]
+test_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT]
+compression: c23  # compression-level for videos
+train_batchSize: 32   # training batch size
+test_batchSize: 32   # test batch size
+workers: 8   # number of data loading workers
+frame_num: {'train': 32, 'test': 32}   # number of frames to use per video in training and testing
+resolution: 256   # resolution of output image to network
+with_mask: false   # whether to include mask information in the input
+with_landmark: false   # whether to include facial landmark information in the input
+save_ckpt: true   # whether to save checkpoint
+save_feat: true   # whether to save features
+# data augmentation
+use_data_augmentation: true  # Add this flag to enable/disable data augmentation
+data_aug:
+  flip_prob: 0.5
+  rotate_prob: 0.5
+  rotate_limit: [-10, 10]
+  blur_prob: 0.5
+  blur_limit: [3, 7]
+  brightness_prob: 0.5
+  brightness_limit: [-0.1, 0.1]
+  contrast_limit: [-0.1, 0.1]
+  quality_lower: 40
+  quality_upper: 100
+# mean and std for normalization
+mean: [0.5, 0.5, 0.5]
+std: [0.5, 0.5, 0.5]
+# optimizer config
+optimizer:
+  # choose between 'adam' and 'sgd'
+  type: adam
+  adam:
+    lr: 0.0002  # learning rate
+    beta1: 0.9  # beta1 for Adam optimizer
+    beta2: 0.999 # beta2 for Adam optimizer
+    eps: 0.00000001  # epsilon for Adam optimizer
+    weight_decay: 0.0005  # weight decay for regularization
+    amsgrad: false
+  sgd:
+    lr: 0.0002  # learning rate
+    momentum: 0.9  # momentum for SGD optimizer
+    weight_decay: 0.0005  # weight decay for regularization
+# training config
+lr_scheduler: null   # learning rate scheduler
+nEpochs: 10   # number of epochs to train for
+start_epoch: 0   # manual epoch number (useful for restarts)
+save_epoch: 1   # interval epochs for saving models
+rec_iter: 100   # interval iterations for recording
+logdir: ./logs   # folder to output images and logs
+manualSeed: 1024   # manual seed for random number generation
+save_ckpt: false   # whether to save checkpoint
+# loss function
+loss_func: cross_entropy   # loss function to use
+losstype: null
+# metric
+metric_scoring: auc   # metric for evaluation (auc, acc, eer, ap)
+# cuda
+cuda: true   # whether to use CUDA acceleration
+cudnn: true   # whether to use CuDNN for convolution operations

training/config/config/detector/resnet34.yaml ADDED Viewed

	@@ -0,0 +1,87 @@

+# log dir
+log_dir: /mntcephfs/lab_data/zhiyuanyan/benchmark_results/logs_final/resnet18
+# model setting
+pretrained: /home/zhiyuanyan/disfin/deepfake_benchmark/training/pretrained/resnet34-b627a593.pth   # path to a pre-trained model, if using one
+model_name: resnet34   # model name
+backbone_name: resnet34  # backbone name
+#backbone setting
+backbone_config:
+  num_classes: 2
+  inc: 3
+  dropout: false
+  mode: Original
+# dataset
+all_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT, FaceShifter, DeepFakeDetection, Celeb-DF-v1, Celeb-DF-v2, DFDCP, DFDC, DeeperForensics-1.0, UADFV]
+train_dataset: [FF-NT]
+test_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT]
+compression: c23  # compression-level for videos
+train_batchSize: 32   # training batch size
+test_batchSize: 32   # test batch size
+workers: 8   # number of data loading workers
+frame_num: {'train': 32, 'test': 32}   # number of frames to use per video in training and testing
+resolution: 256   # resolution of output image to network
+with_mask: false   # whether to include mask information in the input
+with_landmark: false   # whether to include facial landmark information in the input
+save_ckpt: true   # whether to save checkpoint
+save_feat: true   # whether to save features
+# data augmentation
+use_data_augmentation: true  # Add this flag to enable/disable data augmentation
+data_aug:
+  flip_prob: 0.5
+  rotate_prob: 0.5
+  rotate_limit: [-10, 10]
+  blur_prob: 0.5
+  blur_limit: [3, 7]
+  brightness_prob: 0.5
+  brightness_limit: [-0.1, 0.1]
+  contrast_limit: [-0.1, 0.1]
+  quality_lower: 40
+  quality_upper: 100
+# mean and std for normalization
+mean: [0.5, 0.5, 0.5]
+std: [0.5, 0.5, 0.5]
+# optimizer config
+optimizer:
+  # choose between 'adam' and 'sgd'
+  type: adam
+  adam:
+    lr: 0.0002  # learning rate
+    beta1: 0.9  # beta1 for Adam optimizer
+    beta2: 0.999 # beta2 for Adam optimizer
+    eps: 0.00000001  # epsilon for Adam optimizer
+    weight_decay: 0.0005  # weight decay for regularization
+    amsgrad: false
+  sgd:
+    lr: 0.0002  # learning rate
+    momentum: 0.9  # momentum for SGD optimizer
+    weight_decay: 0.0005  # weight decay for regularization
+# training config
+lr_scheduler: null   # learning rate scheduler
+nEpochs: 10   # number of epochs to train for
+start_epoch: 0   # manual epoch number (useful for restarts)
+save_epoch: 1   # interval epochs for saving models
+rec_iter: 100   # interval iterations for recording
+logdir: ./logs   # folder to output images and logs
+manualSeed: 1024   # manual seed for random number generation
+save_ckpt: false   # whether to save checkpoint
+# loss function
+loss_func: cross_entropy   # loss function to use
+losstype: null
+# metric
+metric_scoring: auc   # metric for evaluation (auc, acc, eer, ap)
+# cuda
+cuda: true   # whether to use CUDA acceleration
+cudnn: true   # whether to use CuDNN for convolution operations

training/config/config/detector/ucf.yaml ADDED Viewed

	@@ -0,0 +1,130 @@

+# log dir
+log_dir: /data/home/zhiyuanyan/DeepfakeBench/debug_logs/ucf
+# model setting
+pretrained: ./training/pretrained/xception-b5690688.pth   # path to a pre-trained model, if using one
+# pretrained: '/home/zhiyuanyan/.cache/torch/hub/checkpoints/resnet34-b627a593.pth'   # path to a pre-trained model, if using one
+model_name: ucf   # model name
+backbone_name: xception  # backbone name
+encoder_feat_dim: 512  # feature dimension of the backbone
+#backbone setting
+backbone_config:
+  mode: adjust_channel
+  num_classes: 2
+  inc: 3
+  dropout: false
+# dataset
+all_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT, FaceShifter, DeepFakeDetection, Celeb-DF-v1, Celeb-DF-v2, DFDCP, DFDC, DeeperForensics-1.0, UADFV]
+train_dataset: [FF-F2F, FF-DF, FF-FS, FF-NT,]
+test_dataset: [Celeb-DF-v2]
+dataset_type: pair
+compression: c23  # compression-level for videos
+train_batchSize: 16   # training batch size
+test_batchSize: 32   # test batch size
+workers: 8   # number of data loading workers
+frame_num: {'train': 32, 'test': 32}   # number of frames to use per video in training and testing
+resolution: 256   # resolution of output image to network
+with_mask: false   # whether to include mask information in the input
+with_landmark: false   # whether to include facial landmark information in the input
+save_feat: true   # whether to save features
+# label settings
+label_dict:
+  # DFD
+  DFD_fake: 1
+  DFD_real: 0
+  FaceShifter: 1
+  FF-FH: 1
+  # FF++ + FaceShifter(FF-real+FF-FH)
+  # ucf specific label setting
+  FF-DF: 1
+  FF-F2F: 2
+  FF-FS: 3
+  FF-NT: 4
+  FF-real: 0
+  # CelebDF
+  CelebDFv1_real: 0
+  CelebDFv1_fake: 1
+  CelebDFv2_real: 0
+  CelebDFv2_fake: 1
+  # DFDCP
+  DFDCP_Real: 0
+  DFDCP_FakeA: 1
+  DFDCP_FakeB: 1
+  # DFDC
+  DFDC_Fake: 1
+  DFDC_Real: 0
+  # DeeperForensics-1.0
+  DF_fake: 1
+  DF_real: 0
+  # UADFV
+  UADFV_Fake: 1
+  UADFV_Real: 0
+  # roop
+  roop_Fake: 1
+  roop_Real: 0
+# data augmentation
+use_data_augmentation: true  # Add this flag to enable/disable data augmentation
+data_aug:
+  flip_prob: 0.5
+  rotate_prob: 0.5
+  rotate_limit: [-10, 10]
+  blur_prob: 0.5
+  blur_limit: [3, 7]
+  brightness_prob: 0.5
+  brightness_limit: [-0.1, 0.1]
+  contrast_limit: [-0.1, 0.1]
+  quality_lower: 40
+  quality_upper: 100
+# mean and std for normalization
+mean: [0.5, 0.5, 0.5]
+std: [0.5, 0.5, 0.5]
+# optimizer config
+optimizer:
+  # choose between 'adam' and 'sgd'
+  type: adam
+  adam:
+    lr: 0.0002  # learning rate
+    beta1: 0.9  # beta1 for Adam optimizer
+    beta2: 0.999 # beta2 for Adam optimizer
+    eps: 0.00000001  # epsilon for Adam optimizer
+    weight_decay: 0.0005  # weight decay for regularization
+    amsgrad: false
+  sgd:
+    lr: 0.0002  # learning rate
+    momentum: 0.9  # momentum for SGD optimizer
+    weight_decay: 0.0005  # weight decay for regularization
+# training config
+lr_scheduler: null   # learning rate scheduler
+nEpochs: 5   # number of epochs to train for
+start_epoch: 0   # manual epoch number (useful for restarts)
+save_epoch: 1   # interval epochs for saving models
+rec_iter: 100   # interval iterations for recording
+logdir: ./logs   # folder to output images and logs
+manualSeed: 1024   # manual seed for random number generation
+save_ckpt: false   # whether to save checkpoint
+# loss function
+loss_func:
+ cls_loss: cross_entropy   # loss function to use
+ spe_loss: cross_entropy
+ con_loss: contrastive_regularization
+ rec_loss: l1loss
+losstype: null
+# metric
+metric_scoring: auc   # metric for evaluation (auc, acc, eer, ap)
+# cuda
+cuda: true   # whether to use CUDA acceleration
+cudnn: true   # whether to use CuDNN for convolution operations

training/config/config/detector/xception.yaml ADDED Viewed

	@@ -0,0 +1,86 @@

+# log dir
+log_dir: /data/home/zhiyuanyan/DeepfakeBench/logs/testing_bench
+# model setting
+pretrained: /teamspace/studios/this_studio/DeepfakeBench/training/pretrained/xception-b5690688.pth   # path to a pre-trained model, if using one
+model_name: xception   # model name
+backbone_name: xception  # backbone name
+#backbone setting
+backbone_config:
+  mode: original
+  num_classes: 2
+  inc: 3
+  dropout: false
+# dataset
+all_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT, FaceShifter, DeepFakeDetection, Celeb-DF-v1, Celeb-DF-v2, DFDCP, DFDC, DeeperForensics-1.0, UADFV]
+train_dataset: [Celeb-DF-v1, DFDCP, UADFV]
+test_dataset: [Celeb-DF-v1, DFDCP, UADFV]
+compression: c23  # compression-level for videos
+train_batchSize: 32   # training batch size
+test_batchSize: 32   # test batch size
+workers: 8   # number of data loading workers
+frame_num: {'train': 32, 'test': 32}   # number of frames to use per video in training and testing
+resolution: 256   # resolution of output image to network
+with_mask: false   # whether to include mask information in the input
+with_landmark: false   # whether to include facial landmark information in the input
+# data augmentation
+use_data_augmentation: true  # Add this flag to enable/disable data augmentation
+data_aug:
+  flip_prob: 0.5
+  rotate_prob: 0.0
+  rotate_limit: [-10, 10]
+  blur_prob: 0.5
+  blur_limit: [3, 7]
+  brightness_prob: 0.5
+  brightness_limit: [-0.1, 0.1]
+  contrast_limit: [-0.1, 0.1]
+  quality_lower: 40
+  quality_upper: 100
+# mean and std for normalization
+mean: [0.5, 0.5, 0.5]
+std: [0.5, 0.5, 0.5]
+# optimizer config
+optimizer:
+  # choose between 'adam' and 'sgd'
+  type: adam
+  adam:
+    lr: 0.0002  # learning rate
+    beta1: 0.9  # beta1 for Adam optimizer
+    beta2: 0.999 # beta2 for Adam optimizer
+    eps: 0.00000001  # epsilon for Adam optimizer
+    weight_decay: 0.0005  # weight decay for regularization
+    amsgrad: false
+  sgd:
+    lr: 0.0002  # learning rate
+    momentum: 0.9  # momentum for SGD optimizer
+    weight_decay: 0.0005  # weight decay for regularization
+# training config
+lr_scheduler: null   # learning rate scheduler
+nEpochs: 10   # number of epochs to train for
+start_epoch: 0   # manual epoch number (useful for restarts)
+save_epoch: 1   # interval epochs for saving models
+rec_iter: 100   # interval iterations for recording
+logdir: ./logs   # folder to output images and logs
+manualSeed: 1024   # manual seed for random number generation
+save_ckpt: true   # whether to save checkpoint
+save_feat: true   # whether to save features
+# loss function
+loss_func: cross_entropy   # loss function to use
+losstype: null
+# metric
+metric_scoring: auc   # metric for evaluation (auc, acc, eer, ap)
+# cuda
+cuda: true   # whether to use CUDA acceleration
+cudnn: true   # whether to use CuDNN for convolution operations

training/config/config/test_config.yaml ADDED Viewed

	@@ -0,0 +1,38 @@

+mode: test
+lmdb: False
+dataset_root_rgb: './datasets'
+lmdb_dir:  'I:\transform_2_lmdb'
+dataset_json_folder: '/teamspace/studios/this_studio/DeepfakeBench/preprocessing/dataset_json'
+label_dict:
+  # DFD
+  DFD_fake: 1
+  DFD_real: 0
+  # FF++ + FaceShifter(FF-real+FF-FH)
+  FF-SH: 1
+  FF-F2F: 1
+  FF-DF: 1
+  FF-FS: 1
+  FF-NT: 1
+  FF-FH: 1
+  FF-real: 0
+  # CelebDF
+  CelebDFv1_real: 0
+  CelebDFv1_fake: 1
+  CelebDFv2_real: 0
+  CelebDFv2_fake: 1
+  # DFDCP
+  DFDCP_Real: 0
+  DFDCP_FakeA: 1
+  DFDCP_FakeB: 1
+  # DFDC
+  DFDC_Fake: 1
+  DFDC_Real: 0
+  # DeeperForensics-1.0
+  DF_fake: 1
+  DF_real: 0
+  # UADFV
+  UADFV_Fake: 1
+  UADFV_Real: 0
+  # Roop
+  roop_Real: 0
+  roop_Fake: 1

training/config/config/train_config.yaml ADDED Viewed

	@@ -0,0 +1,43 @@

+mode: train
+lmdb: False
+dry_run: false
+dataset_root_rgb: './datasets'
+lmdb_dir:  'I:\transform_2_lmdb'
+dataset_json_folder: '/teamspace/studios/this_studio/DeepfakeBench/preprocessing/dataset_json'
+SWA: False
+save_avg: True
+log_dir: ./logs/training/
+# label settings
+label_dict:
+  # DFD
+  DFD_fake: 1
+  DFD_real: 0
+  # FF++ + FaceShifter(FF-real+FF-FH)
+  FF-SH: 1
+  FF-F2F: 1
+  FF-DF: 1
+  FF-FS: 1
+  FF-NT: 1
+  FF-FH: 1
+  FF-real: 0
+  # CelebDF
+  CelebDFv1_real: 0
+  CelebDFv1_fake: 1
+  CelebDFv2_real: 0
+  CelebDFv2_fake: 1
+  # DFDCP
+  DFDCP_Real: 0
+  DFDCP_FakeA: 1
+  DFDCP_FakeB: 1
+  # DFDC
+  DFDC_Fake: 1
+  DFDC_Real: 0
+  # DeeperForensics-1.0
+  DF_fake: 1
+  DF_real: 0
+  # UADFV
+  UADFV_Fake: 1
+  UADFV_Real: 0
+  # Roop
+  roop_Real: 0
+  roop_Fake: 1

training/config/detector/efficientnetb4.yaml ADDED Viewed

	@@ -0,0 +1,88 @@

+# log dir
+log_dir: logs/evaluations/effnb4
+# model setting
+# pretrained: /home/zhiyuanyan/disfin/deepfake_benchmark/training/pretrained/xception-b5690688.pth   # path to a pre-trained model, if using one
+pretrained: ./training/pretrained/efficientnet-b4-6ed6700e.pth   # path to a pre-trained model, if using one
+model_name: efficientnetb4   # model name
+backbone_name: efficientnetb4  # backbone name
+#backbone setting
+backbone_config:
+  num_classes: 2
+  inc: 3
+  dropout: false
+  mode: Original
+# dataset
+all_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT, FaceShifter, DeepFakeDetection, Celeb-DF-v1, Celeb-DF-v2, DFDCP, DFDC, DeeperForensics-1.0, UADFV]
+train_dataset: [FF-NT]
+test_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT]
+compression: c23  # compression-level for videos
+train_batchSize: 32   # training batch size
+test_batchSize: 32   # test batch size
+workers: 8   # number of data loading workers
+frame_num: {'train': 32, 'test': 32}   # number of frames to use per video in training and testing
+resolution: 256   # resolution of output image to network
+with_mask: false   # whether to include mask information in the input
+with_landmark: false   # whether to include facial landmark information in the input
+save_ckpt: true   # whether to save checkpoint
+save_feat: true   # whether to save features
+# data augmentation
+use_data_augmentation: true  # Add this flag to enable/disable data augmentation
+data_aug:
+  flip_prob: 0.5
+  rotate_prob: 0.5
+  rotate_limit: [-10, 10]
+  blur_prob: 0.5
+  blur_limit: [3, 7]
+  brightness_prob: 0.5
+  brightness_limit: [-0.1, 0.1]
+  contrast_limit: [-0.1, 0.1]
+  quality_lower: 40
+  quality_upper: 100
+# mean and std for normalization
+mean: [0.5, 0.5, 0.5]
+std: [0.5, 0.5, 0.5]
+# optimizer config
+optimizer:
+  # choose between 'adam' and 'sgd'
+  type: adam
+  adam:
+    lr: 0.0002  # learning rate
+    beta1: 0.9  # beta1 for Adam optimizer
+    beta2: 0.999 # beta2 for Adam optimizer
+    eps: 0.00000001  # epsilon for Adam optimizer
+    weight_decay: 0.0005  # weight decay for regularization
+    amsgrad: false
+  sgd:
+    lr: 0.0002  # learning rate
+    momentum: 0.9  # momentum for SGD optimizer
+    weight_decay: 0.0005  # weight decay for regularization
+# training config
+lr_scheduler: null   # learning rate scheduler
+nEpochs: 10   # number of epochs to train for
+start_epoch: 0   # manual epoch number (useful for restarts)
+save_epoch: 1   # interval epochs for saving models
+rec_iter: 100   # interval iterations for recording
+logdir: ./logs   # folder to output images and logs
+manualSeed: 1024   # manual seed for random number generation
+save_ckpt: false   # whether to save checkpoint
+# loss function
+loss_func: cross_entropy   # loss function to use
+losstype: null
+# metric
+metric_scoring: auc   # metric for evaluation (auc, acc, eer, ap)
+# cuda
+cuda: true   # whether to use CUDA acceleration
+cudnn: true   # whether to use CuDNN for convolution operations

training/config/detector/ucf.yaml ADDED Viewed

	@@ -0,0 +1,131 @@

+# log dir
+log_dir: /data/home/zhiyuanyan/DeepfakeBench/debug_logs/ucf
+# model setting
+pretrained: /teamspace/studios/this_studio/DeepfakeBench/training/pretrained/xception-b5690688.pth   # path to a pre-trained model, if using one
+# pretrained: '/home/zhiyuanyan/.cache/torch/hub/checkpoints/resnet34-b627a593.pth'   # path to a pre-trained model, if using one
+model_name: ucf   # model name
+backbone_name: xception  # backbone name
+encoder_feat_dim: 512  # feature dimension of the backbone
+#backbone setting
+backbone_config:
+  mode: adjust_channel
+  num_classes: 2
+  inc: 3
+  dropout: false
+# dataset
+all_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT, FaceShifter, DeepFakeDetection, Celeb-DF-v1, Celeb-DF-v2, DFDCP, DFDC, DeeperForensics-1.0, UADFV]
+train_dataset: [FF-F2F, FF-DF, FF-FS, FF-NT,]
+test_dataset: [Celeb-DF-v2]
+dataset_type: pair
+compression: c23  # compression-level for videos
+train_batchSize: 16   # training batch size
+test_batchSize: 32   # test batch size
+workers: 8   # number of data loading workers
+frame_num: {'train': 32, 'test': 32}   # number of frames to use per video in training and testing
+resolution: 256   # resolution of output image to network
+with_mask: false   # whether to include mask information in the input
+with_landmark: false   # whether to include facial landmark information in the input
+save_ckpt: true   # whether to save checkpoint
+save_feat: true   # whether to save features
+# label settings
+label_dict:
+  # DFD
+  DFD_fake: 1
+  DFD_real: 0
+  FaceShifter: 1
+  FF-FH: 1
+  # FF++ + FaceShifter(FF-real+FF-FH)
+  # ucf specific label setting
+  FF-DF: 1
+  FF-F2F: 2
+  FF-FS: 3
+  FF-NT: 4
+  FF-real: 0
+  # CelebDF
+  CelebDFv1_real: 0
+  CelebDFv1_fake: 1
+  CelebDFv2_real: 0
+  CelebDFv2_fake: 1
+  # DFDCP
+  DFDCP_Real: 0
+  DFDCP_FakeA: 1
+  DFDCP_FakeB: 1
+  # DFDC
+  DFDC_Fake: 1
+  DFDC_Real: 0
+  # DeeperForensics-1.0
+  DF_fake: 1
+  DF_real: 0
+  # UADFV
+  UADFV_Fake: 1
+  UADFV_Real: 0
+  # roop
+  roop_Fake: 1
+  roop_Real: 0
+# data augmentation
+use_data_augmentation: true  # Add this flag to enable/disable data augmentation
+data_aug:
+  flip_prob: 0.5
+  rotate_prob: 0.5
+  rotate_limit: [-10, 10]
+  blur_prob: 0.5
+  blur_limit: [3, 7]
+  brightness_prob: 0.5
+  brightness_limit: [-0.1, 0.1]
+  contrast_limit: [-0.1, 0.1]
+  quality_lower: 40
+  quality_upper: 100
+# mean and std for normalization
+mean: [0.5, 0.5, 0.5]
+std: [0.5, 0.5, 0.5]
+# optimizer config
+optimizer:
+  # choose between 'adam' and 'sgd'
+  type: adam
+  adam:
+    lr: 0.0002  # learning rate
+    beta1: 0.9  # beta1 for Adam optimizer
+    beta2: 0.999 # beta2 for Adam optimizer
+    eps: 0.00000001  # epsilon for Adam optimizer
+    weight_decay: 0.0005  # weight decay for regularization
+    amsgrad: false
+  sgd:
+    lr: 0.0002  # learning rate
+    momentum: 0.9  # momentum for SGD optimizer
+    weight_decay: 0.0005  # weight decay for regularization
+# training config
+lr_scheduler: null   # learning rate scheduler
+nEpochs: 5   # number of epochs to train for
+start_epoch: 0   # manual epoch number (useful for restarts)
+save_epoch: 1   # interval epochs for saving models
+rec_iter: 100   # interval iterations for recording
+logdir: ./logs   # folder to output images and logs
+manualSeed: 1024   # manual seed for random number generation
+save_ckpt: false   # whether to save checkpoint
+# loss function
+loss_func:
+ cls_loss: cross_entropy   # loss function to use
+ spe_loss: cross_entropy
+ con_loss: contrastive_regularization
+ rec_loss: l1loss
+losstype: null
+# metric
+metric_scoring: auc   # metric for evaluation (auc, acc, eer, ap)
+# cuda
+cuda: true   # whether to use CUDA acceleration
+cudnn: true   # whether to use CuDNN for convolution operations

training/config/detector/xception.yaml ADDED Viewed

	@@ -0,0 +1,86 @@

+# log dir
+log_dir: /teamspace/studios/this_studio/DeepfakeBench/logs/testing_bench
+# model setting
+pretrained: /teamspace/studios/this_studio/DeepfakeBench/training/pretrained/xception-b5690688.pth   # path to a pre-trained model, if using one
+model_name: xception   # model name
+backbone_name: xception  # backbone name
+#backbone setting
+backbone_config:
+  mode: original
+  num_classes: 2
+  inc: 3
+  dropout: false
+# dataset
+all_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT, FaceShifter, DeepFakeDetection, Celeb-DF-v1, Celeb-DF-v2, DFDCP, DFDC, DeeperForensics-1.0, UADFV]
+train_dataset: [Celeb-DF-v1, DFDCP]
+test_dataset: [UADFV]
+compression: c23  # compression-level for videos
+train_batchSize: 32   # training batch size
+test_batchSize: 32   # test batch size
+workers: 8   # number of data loading workers
+frame_num: {'train': 32, 'test': 32}   # number of frames to use per video in training and testing
+resolution: 256   # resolution of output image to network
+with_mask: false   # whether to include mask information in the input
+with_landmark: false   # whether to include facial landmark information in the input
+# data augmentation
+use_data_augmentation: true  # Add this flag to enable/disable data augmentation
+data_aug:
+  flip_prob: 0.5
+  rotate_prob: 0.0
+  rotate_limit: [-10, 10]
+  blur_prob: 0.5
+  blur_limit: [3, 7]
+  brightness_prob: 0.5
+  brightness_limit: [-0.1, 0.1]
+  contrast_limit: [-0.1, 0.1]
+  quality_lower: 40
+  quality_upper: 100
+# mean and std for normalization
+mean: [0.5, 0.5, 0.5]
+std: [0.5, 0.5, 0.5]
+# optimizer config
+optimizer:
+  # choose between 'adam' and 'sgd'
+  type: adam
+  adam:
+    lr: 0.0002  # learning rate
+    beta1: 0.9  # beta1 for Adam optimizer
+    beta2: 0.999 # beta2 for Adam optimizer
+    eps: 0.00000001  # epsilon for Adam optimizer
+    weight_decay: 0.0005  # weight decay for regularization
+    amsgrad: false
+  sgd:
+    lr: 0.0002  # learning rate
+    momentum: 0.9  # momentum for SGD optimizer
+    weight_decay: 0.0005  # weight decay for regularization
+# training config
+lr_scheduler: null   # learning rate scheduler
+nEpochs: 10   # number of epochs to train for
+start_epoch: 0   # manual epoch number (useful for restarts)
+save_epoch: 1   # interval epochs for saving models
+rec_iter: 100   # interval iterations for recording
+logdir: ./logs   # folder to output images and logs
+manualSeed: 1024   # manual seed for random number generation
+save_ckpt: true   # whether to save checkpoint
+save_feat: true   # whether to save features
+# loss function
+loss_func: cross_entropy   # loss function to use
+losstype: null
+# metric
+metric_scoring: auc   # metric for evaluation (auc, acc, eer, ap)
+# cuda
+cuda: true   # whether to use CUDA acceleration
+cudnn: true   # whether to use CuDNN for convolution operations

training/config/test_config.yaml ADDED Viewed

	@@ -0,0 +1,38 @@

+mode: test
+lmdb: False
+rgb_dir: ''
+lmdb_dir:  './datasets/lmdb'
+dataset_json_folder: './preprocessing/dataset_json'
+label_dict:
+  # DFD
+  DFD_fake: 1
+  DFD_real: 0
+  # FF++ + FaceShifter(FF-real+FF-FH)
+  FF-SH: 1
+  FF-F2F: 1
+  FF-DF: 1
+  FF-FS: 1
+  FF-NT: 1
+  FF-FH: 1
+  FF-real: 0
+  # CelebDF
+  CelebDFv1_real: 0
+  CelebDFv1_fake: 1
+  CelebDFv2_real: 0
+  CelebDFv2_fake: 1
+  # DFDCP
+  DFDCP_Real: 0
+  DFDCP_FakeA: 1
+  DFDCP_FakeB: 1
+  # DFDC
+  DFDC_Fake: 1
+  DFDC_Real: 0
+  # DeeperForensics-1.0
+  DF_fake: 1
+  DF_real: 0
+  # UADFV
+  UADFV_Fake: 1
+  UADFV_Real: 0
+  # Roop
+  roop_Real: 0
+  roop_Fake: 1

training/config/train_config.yaml ADDED Viewed

	@@ -0,0 +1,43 @@

+mode: train
+lmdb: False
+dry_run: false
+rgb_dir: ''
+lmdb_dir:  './datasets/lmdb'
+dataset_json_folder: './preprocessing/dataset_json'
+SWA: False
+save_avg: True
+log_dir: ./logs/training/
+# label settings
+label_dict:
+  # DFD
+  DFD_fake: 1
+  DFD_real: 0
+  # FF++ + FaceShifter(FF-real+FF-FH)
+  FF-SH: 1
+  FF-F2F: 1
+  FF-DF: 1
+  FF-FS: 1
+  FF-NT: 1
+  FF-FH: 1
+  FF-real: 0
+  # CelebDF
+  CelebDFv1_real: 0
+  CelebDFv1_fake: 1
+  CelebDFv2_real: 0
+  CelebDFv2_fake: 1
+  # DFDCP
+  DFDCP_Real: 0
+  DFDCP_FakeA: 1
+  DFDCP_FakeB: 1
+  # DFDC
+  DFDC_Fake: 1
+  DFDC_Real: 0
+  # DeeperForensics-1.0
+  DF_fake: 1
+  DF_real: 0
+  # UADFV
+  UADFV_Fake: 1
+  UADFV_Real: 0
+  # Roop
+  roop_Real: 0
+  roop_Fake: 1

training/dataset/I2G_dataset.py ADDED Viewed

	@@ -0,0 +1,389 @@

+# Created by: Kaede Shiohara
+# Yamasaki Lab at The University of Tokyo
+# [email protected]
+# Copyright (c) 2021
+# 3rd party softwares' licenses are noticed at https://github.com/mapooon/SelfBlendedImages/blob/master/LICENSE
+import logging
+import os
+import pickle
+import cv2
+import numpy as np
+import scipy as sp
+import yaml
+from skimage.measure import label, regionprops
+import random
+from PIL import Image
+import sys
+import albumentations as A
+from torch.utils.data import DataLoader
+from dataset.utils.bi_online_generation import random_get_hull
+from dataset.abstract_dataset import DeepfakeAbstractBaseDataset
+from dataset.pair_dataset import pairDataset
+import torch
+class RandomDownScale(A.core.transforms_interface.ImageOnlyTransform):
+    def apply(self, img, ratio_list=None, **params):
+        if ratio_list is None:
+            ratio_list = [2, 4]
+        r = ratio_list[np.random.randint(len(ratio_list))]
+        return self.randomdownscale(img, r)
+    def randomdownscale(self, img, r):
+        keep_ratio = True
+        keep_input_shape = True
+        H, W, C = img.shape
+        img_ds = cv2.resize(img, (int(W / r), int(H / r)), interpolation=cv2.INTER_NEAREST)
+        if keep_input_shape:
+            img_ds = cv2.resize(img_ds, (W, H), interpolation=cv2.INTER_LINEAR)
+        return img_ds
+'''
+from PIL import ImageDraw
+# 创建一个可以在图像上绘制的对象
+img_pil=Image.fromarray(img)
+draw = ImageDraw.Draw(img_pil)
+# 在图像上绘制点
+for i, point in enumerate(landmark):
+    x, y = point
+    radius = 1  # 点的半径
+    draw.ellipse((x-radius, y-radius, x+radius, y+radius), fill="red")
+    draw.text((x+radius+2, y-radius), str(i), fill="black")  # 在点旁边添加标签
+img_pil.show()
+'''
+def alpha_blend(source, target, mask):
+    mask_blured = get_blend_mask(mask)
+    img_blended = (mask_blured * source + (1 - mask_blured) * target)
+    return img_blended, mask_blured
+def dynamic_blend(source, target, mask):
+    mask_blured = get_blend_mask(mask)
+    # worth consideration, 1 in the official paper, 0.25, 0.5, 0.75,1,1,1 in sbi.
+    blend_list = [1, 1, 1]
+    blend_ratio = blend_list[np.random.randint(len(blend_list))]
+    mask_blured *= blend_ratio
+    img_blended = (mask_blured * source + (1 - mask_blured) * target)
+    return img_blended, mask_blured
+def get_blend_mask(mask):
+    H, W = mask.shape
+    size_h = np.random.randint(192, 257)
+    size_w = np.random.randint(192, 257)
+    mask = cv2.resize(mask, (size_w, size_h))
+    kernel_1 = random.randrange(5, 26, 2)
+    kernel_1 = (kernel_1, kernel_1)
+    kernel_2 = random.randrange(5, 26, 2)
+    kernel_2 = (kernel_2, kernel_2)
+    mask_blured = cv2.GaussianBlur(mask, kernel_1, 0)
+    mask_blured = mask_blured / (mask_blured.max())
+    mask_blured[mask_blured < 1] = 0
+    mask_blured = cv2.GaussianBlur(mask_blured, kernel_2, np.random.randint(5, 46))
+    mask_blured = mask_blured / (mask_blured.max())
+    mask_blured = cv2.resize(mask_blured, (W, H))
+    return mask_blured.reshape((mask_blured.shape + (1,)))
+def get_alpha_blend_mask(mask):
+    kernel_list = [(11, 11), (9, 9), (7, 7), (5, 5), (3, 3)]
+    blend_list = [0.25, 0.5, 0.75]
+    kernel_idxs = random.choices(range(len(kernel_list)), k=2)
+    blend_ratio = blend_list[random.sample(range(len(blend_list)), 1)[0]]
+    mask_blured = cv2.GaussianBlur(mask, kernel_list[0], 0)
+    # print(mask_blured.max())
+    mask_blured[mask_blured < mask_blured.max()] = 0
+    mask_blured[mask_blured > 0] = 1
+    # mask_blured = mask
+    mask_blured = cv2.GaussianBlur(mask_blured, kernel_list[kernel_idxs[1]], 0)
+    mask_blured = mask_blured / (mask_blured.max())
+    return mask_blured.reshape((mask_blured.shape + (1,)))
+class I2GDataset(DeepfakeAbstractBaseDataset):
+    def __init__(self, config=None, mode='train'):
+        #config['GridShuffle']['p'] = 0
+        super().__init__(config, mode)
+        real_images_list = [img for img, label in zip(self.image_list, self.label_list) if label == 0]
+        self.real_images_list = list(set(real_images_list))  #  de-duplicate since DF,F2F,FS,NT have same real images
+        self.source_transforms = self.get_source_transforms()
+        self.transforms = self.get_transforms()
+        self.init_nearest()
+    def init_nearest(self):
+        if os.path.exists('training/lib/nearest_face_info.pkl'):
+            with open('training/lib/nearest_face_info.pkl', 'rb') as f:
+                face_info = pickle.load(f)
+        self.face_info = face_info
+        # Check if the dictionary has already been created
+        if os.path.exists('training/lib/landmark_dict_ffall.pkl'):
+            with open('training/lib/landmark_dict_ffall.pkl', 'rb') as f:
+                landmark_dict = pickle.load(f)
+        self.landmark_dict = landmark_dict
+    def reorder_landmark(self, landmark):
+        landmark = landmark.copy()  # 创建landmark的副本
+        landmark_add = np.zeros((13, 2))
+        for idx, idx_l in enumerate([77, 75, 76, 68, 69, 70, 71, 80, 72, 73, 79, 74, 78]):
+            landmark_add[idx] = landmark[idx_l]
+        landmark[68:] = landmark_add
+        return landmark
+    def hflip(self, img, mask=None, landmark=None, bbox=None):
+        H, W = img.shape[:2]
+        landmark = landmark.copy()
+        if bbox is not None:
+            bbox = bbox.copy()
+        if landmark is not None:
+            landmark_new = np.zeros_like(landmark)
+            landmark_new[:17] = landmark[:17][::-1]
+            landmark_new[17:27] = landmark[17:27][::-1]
+            landmark_new[27:31] = landmark[27:31]
+            landmark_new[31:36] = landmark[31:36][::-1]
+            landmark_new[36:40] = landmark[42:46][::-1]
+            landmark_new[40:42] = landmark[46:48][::-1]
+            landmark_new[42:46] = landmark[36:40][::-1]
+            landmark_new[46:48] = landmark[40:42][::-1]
+            landmark_new[48:55] = landmark[48:55][::-1]
+            landmark_new[55:60] = landmark[55:60][::-1]
+            landmark_new[60:65] = landmark[60:65][::-1]
+            landmark_new[65:68] = landmark[65:68][::-1]
+            if len(landmark) == 68:
+                pass
+            elif len(landmark) == 81:
+                landmark_new[68:81] = landmark[68:81][::-1]
+            else:
+                raise NotImplementedError
+            landmark_new[:, 0] = W - landmark_new[:, 0]
+        else:
+            landmark_new = None
+        if bbox is not None:
+            bbox_new = np.zeros_like(bbox)
+            bbox_new[0, 0] = bbox[1, 0]
+            bbox_new[1, 0] = bbox[0, 0]
+            bbox_new[:, 0] = W - bbox_new[:, 0]
+            bbox_new[:, 1] = bbox[:, 1].copy()
+            if len(bbox) > 2:
+                bbox_new[2, 0] = W - bbox[3, 0]
+                bbox_new[2, 1] = bbox[3, 1]
+                bbox_new[3, 0] = W - bbox[2, 0]
+                bbox_new[3, 1] = bbox[2, 1]
+                bbox_new[4, 0] = W - bbox[4, 0]
+                bbox_new[4, 1] = bbox[4, 1]
+                bbox_new[5, 0] = W - bbox[6, 0]
+                bbox_new[5, 1] = bbox[6, 1]
+                bbox_new[6, 0] = W - bbox[5, 0]
+                bbox_new[6, 1] = bbox[5, 1]
+        else:
+            bbox_new = None
+        if mask is not None:
+            mask = mask[:, ::-1]
+        else:
+            mask = None
+        img = img[:, ::-1].copy()
+        return img, mask, landmark_new, bbox_new
+    def get_source_transforms(self):
+        return A.Compose([
+            A.Compose([
+                A.RGBShift((-20, 20), (-20, 20), (-20, 20), p=0.3),
+                A.HueSaturationValue(hue_shift_limit=(-0.3, 0.3), sat_shift_limit=(-0.3, 0.3),
+                                     val_shift_limit=(-0.3, 0.3), p=1),
+                A.RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=1),
+            ], p=1),
+            A.OneOf([
+                RandomDownScale(p=1),
+                A.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=1),
+            ], p=1),
+        ], p=1.)
+    def get_fg_bg(self, one_lmk_path):
+        """
+        Get foreground and background paths
+        """
+        bg_lmk_path = one_lmk_path
+        # Randomly pick one from the nearest neighbors for the foreground
+        if bg_lmk_path in self.face_info:
+            fg_lmk_path = random.choice(self.face_info[bg_lmk_path])
+        else:
+            fg_lmk_path = bg_lmk_path
+        return fg_lmk_path, bg_lmk_path
+    def get_transforms(self):
+        return A.Compose([
+            A.RGBShift((-20, 20), (-20, 20), (-20, 20), p=0.3),
+            A.HueSaturationValue(hue_shift_limit=(-0.3, 0.3), sat_shift_limit=(-0.3, 0.3),
+                                 val_shift_limit=(-0.3, 0.3), p=0.3),
+            A.RandomBrightnessContrast(brightness_limit=(-0.3, 0.3), contrast_limit=(-0.3, 0.3), p=0.3),
+            A.ImageCompression(quality_lower=40, quality_upper=100, p=0.5),
+        ],
+            additional_targets={f'image1': 'image'},
+            p=1.)
+    def randaffine(self, img, mask):
+        f = A.Affine(
+            translate_percent={'x': (-0.03, 0.03), 'y': (-0.015, 0.015)},
+            scale=[0.95, 1 / 0.95],
+            fit_output=False,
+            p=1)
+        g = A.ElasticTransform(
+            alpha=50,
+            sigma=7,
+            alpha_affine=0,
+            p=1,
+        )
+        transformed = f(image=img, mask=mask)
+        img = transformed['image']
+        mask = transformed['mask']
+        transformed = g(image=img, mask=mask)
+        mask = transformed['mask']
+        return img, mask
+    def __len__(self):
+        return len(self.real_images_list)
+    def colorTransfer(self, src, dst, mask):
+        transferredDst = np.copy(dst)
+        maskIndices = np.where(mask != 0)
+        maskedSrc = src[maskIndices[0], maskIndices[1]].astype(np.float32)
+        maskedDst = dst[maskIndices[0], maskIndices[1]].astype(np.float32)
+        # Compute means and standard deviations
+        meanSrc = np.mean(maskedSrc, axis=0)
+        stdSrc = np.std(maskedSrc, axis=0)
+        meanDst = np.mean(maskedDst, axis=0)
+        stdDst = np.std(maskedDst, axis=0)
+        # Perform color transfer
+        maskedDst = (maskedDst - meanDst) * (stdSrc / stdDst) + meanSrc
+        maskedDst = np.clip(maskedDst, 0, 255)
+        # Copy the entire background into transferredDst
+        transferredDst = np.copy(dst)
+        # Now apply color transfer only to the masked region
+        transferredDst[maskIndices[0], maskIndices[1]] = maskedDst.astype(np.uint8)
+        return transferredDst
+    def two_blending(self, img_bg, img_fg, landmark):
+        H, W = len(img_bg), len(img_bg[0])
+        if np.random.rand() < 0.25:
+            landmark = landmark[:68]
+        logging.disable(logging.FATAL)
+        mask = random_get_hull(landmark, img_bg)
+        logging.disable(logging.NOTSET)
+        source = img_fg.copy()
+        target = img_bg.copy()
+        # if np.random.rand() < 0.5:
+        #     source = self.source_transforms(image=source.astype(np.uint8))['image']
+        # else:
+        #     target = self.source_transforms(image=target.astype(np.uint8))['image']
+        source_v2, mask_v2 = self.randaffine(source, mask)
+        source_v3=self.colorTransfer(target,source_v2,mask_v2)
+        img_blended, mask = dynamic_blend(source_v3, target, mask_v2)
+        img_blended = img_blended.astype(np.uint8)
+        img = img_bg.astype(np.uint8)
+        return img, img_blended, mask.squeeze(2)
+    def __getitem__(self, index):
+        image_path_bg = self.real_images_list[index]
+        label = 0
+        # Get the mask and landmark paths
+        landmark_path_bg = image_path_bg.replace('frames', 'landmarks').replace('.png', '.npy')  # Use .npy for landmark
+        landmark_path_fg, landmark_path_bg = self.get_fg_bg(landmark_path_bg)
+        image_path_fg = landmark_path_fg.replace('landmarks','frames').replace('.npy','.png')
+        try:
+            image_bg = self.load_rgb(image_path_bg)
+            image_fg = self.load_rgb(image_path_fg)
+        except Exception as e:
+            # Skip this image and return the first one
+            print(f"Error loading image at index {index}: {e}")
+            return self.__getitem__(0)
+        image_bg = np.array(image_bg)  # Convert to numpy array for data augmentation
+        image_fg = np.array(image_fg)  # Convert to numpy array for data augmentation
+        landmarks_bg = self.load_landmark(landmark_path_bg)
+        landmarks_fg = self.load_landmark(landmark_path_fg)
+        landmarks_bg = np.clip(landmarks_bg, 0, self.config['resolution'] - 1)
+        landmarks_bg = self.reorder_landmark(landmarks_bg)
+        img_r, img_f, mask_f = self.two_blending(image_bg.copy(), image_fg.copy(),landmarks_bg.copy())
+        transformed = self.transforms(image=img_f.astype('uint8'), image1=img_r.astype('uint8'))
+        img_f = transformed['image']
+        img_r = transformed['image1']
+        # img_f = img_f.transpose((2, 0, 1))
+        # img_r = img_r.transpose((2, 0, 1))
+        img_f = self.normalize(self.to_tensor(img_f))
+        img_r = self.normalize(self.to_tensor(img_r))
+        mask_f = self.to_tensor(mask_f)
+        mask_r=torch.zeros_like(mask_f) # zeros or ones
+        return img_f, img_r, mask_f,mask_r
+    @staticmethod
+    def collate_fn(batch):
+        img_f, img_r, mask_f,mask_r = zip(*batch)
+        data = {}
+        fake_mask = torch.stack(mask_f,dim=0)
+        real_mask = torch.stack(mask_r, dim=0)
+        fake_images = torch.stack(img_f, dim=0)
+        real_images = torch.stack(img_r, dim=0)
+        data['image'] = torch.cat([real_images, fake_images], dim=0)
+        data['label'] = torch.tensor([0] * len(img_r) + [1] * len(img_f))
+        data['landmark'] = None
+        data['mask'] = torch.cat([real_mask, fake_mask], dim=0)
+        return data
+if __name__ == '__main__':
+    detector_path = r"./training/config/detector/xception.yaml"
+    # weights_path = "./ckpts/xception/CDFv2/tb_v1/ov.pth"
+    with open(detector_path, 'r') as f:
+        config = yaml.safe_load(f)
+    with open('./training/config/train_config.yaml', 'r') as f:
+        config2 = yaml.safe_load(f)
+    config2['data_manner'] = 'lmdb'
+    config['dataset_json_folder'] = 'preprocessing/dataset_json_v3'
+    config.update(config2)
+    dataset = I2GDataset(config=config)
+    batch_size = 2
+    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True,collate_fn=dataset.collate_fn)
+    for i, batch in enumerate(dataloader):
+        print(f"Batch {i}: {batch}")
+        continue

training/dataset/__init__.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import os
+import sys
+current_file_path = os.path.abspath(__file__)
+parent_dir = os.path.dirname(os.path.dirname(current_file_path))
+project_root_dir = os.path.dirname(parent_dir)
+sys.path.append(parent_dir)
+sys.path.append(project_root_dir)
+from .I2G_dataset import I2GDataset
+from .iid_dataset import IIDDataset
+from .abstract_dataset import DeepfakeAbstractBaseDataset
+from .ff_blend import FFBlendDataset
+from .fwa_blend import FWABlendDataset
+from .lrl_dataset import LRLDataset
+from .pair_dataset import pairDataset
+from .sbi_dataset import SBIDataset
+from .lsda_dataset import LSDADataset
+from .tall_dataset import TALLDataset

training/dataset/abstract_dataset.py ADDED Viewed

	@@ -0,0 +1,621 @@

+# author: Zhiyuan Yan
+# email: [email protected]
+# date: 2023-03-30
+# description: Abstract Base Class for all types of deepfake datasets.
+import sys
+import lmdb
+sys.path.append('.')
+import os
+import math
+import yaml
+import glob
+import json
+import numpy as np
+from copy import deepcopy
+import cv2
+import random
+from PIL import Image
+from collections import defaultdict
+import torch
+from torch.autograd import Variable
+from torch.utils import data
+from torchvision import transforms as T
+import albumentations as A
+from .albu import IsotropicResize
+FFpp_pool=['FaceForensics++','FaceShifter','DeepFakeDetection','FF-DF','FF-F2F','FF-FS','FF-NT']#
+def all_in_pool(inputs,pool):
+    for each in inputs:
+        if each not in pool:
+            return False
+    return True
+class DeepfakeAbstractBaseDataset(data.Dataset):
+    """
+    Abstract base class for all deepfake datasets.
+    """
+    def __init__(self, config=None, mode='train'):
+        """Initializes the dataset object.
+        Args:
+            config (dict): A dictionary containing configuration parameters.
+            mode (str): A string indicating the mode (train or test).
+        Raises:
+            NotImplementedError: If mode is not train or test.
+        """
+        # Set the configuration and mode
+        self.config = config
+        self.mode = mode
+        self.compression = config['compression']
+        self.frame_num = config['frame_num'][mode]
+        # Check if 'video_mode' exists in config, otherwise set video_level to False
+        self.video_level = config.get('video_mode', False)
+        self.clip_size = config.get('clip_size', None)
+        self.lmdb = config.get('lmdb', False)
+        # Dataset dictionary
+        self.image_list = []
+        self.label_list = []
+        # Set the dataset dictionary based on the mode
+        if mode == 'train':
+            dataset_list = config['train_dataset']
+            # Training data should be collected together for training
+            image_list, label_list = [], []
+            for one_data in dataset_list:
+                tmp_image, tmp_label, tmp_name = self.collect_img_and_label_for_one_dataset(one_data)
+                image_list.extend(tmp_image)
+                label_list.extend(tmp_label)
+            if self.lmdb:
+                if len(dataset_list)>1:
+                    if all_in_pool(dataset_list,FFpp_pool):
+                        lmdb_path = os.path.join(config['lmdb_dir'], f"FaceForensics++_lmdb")
+                        self.env = lmdb.open(lmdb_path, create=False, subdir=True, readonly=True, lock=False)
+                    else:
+                        raise ValueError('Training with multiple dataset and lmdb is not implemented yet.')
+                else:
+                    lmdb_path = os.path.join(config['lmdb_dir'], f"{dataset_list[0] if dataset_list[0] not in FFpp_pool else 'FaceForensics++'}_lmdb")
+                    self.env = lmdb.open(lmdb_path, create=False, subdir=True, readonly=True, lock=False)
+        elif mode == 'test':
+            one_data = config['test_dataset']
+            # Test dataset should be evaluated separately. So collect only one dataset each time
+            image_list, label_list, name_list = self.collect_img_and_label_for_one_dataset(one_data)
+            if self.lmdb:
+                lmdb_path = os.path.join(config['lmdb_dir'], f"{one_data}_lmdb" if one_data not in FFpp_pool else 'FaceForensics++_lmdb')
+                self.env = lmdb.open(lmdb_path, create=False, subdir=True, readonly=True, lock=False)
+        else:
+            raise NotImplementedError('Only train and test modes are supported.')
+        assert len(image_list)!=0 and len(label_list)!=0, f"Collect nothing for {mode} mode!"
+        self.image_list, self.label_list = image_list, label_list
+        # Create a dictionary containing the image and label lists
+        self.data_dict = {
+            'image': self.image_list,
+            'label': self.label_list,
+        }
+        self.transform = self.init_data_aug_method()
+    def init_data_aug_method(self):
+        trans = A.Compose([
+            A.HorizontalFlip(p=self.config['data_aug']['flip_prob']),
+            A.Rotate(limit=self.config['data_aug']['rotate_limit'], p=self.config['data_aug']['rotate_prob']),
+            A.GaussianBlur(blur_limit=self.config['data_aug']['blur_limit'], p=self.config['data_aug']['blur_prob']),
+            A.OneOf([
+                IsotropicResize(max_side=self.config['resolution'], interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC),
+                IsotropicResize(max_side=self.config['resolution'], interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_LINEAR),
+                IsotropicResize(max_side=self.config['resolution'], interpolation_down=cv2.INTER_LINEAR, interpolation_up=cv2.INTER_LINEAR),
+            ], p = 0 if self.config['with_landmark'] else 1),
+            A.OneOf([
+                A.RandomBrightnessContrast(brightness_limit=self.config['data_aug']['brightness_limit'], contrast_limit=self.config['data_aug']['contrast_limit']),
+                A.FancyPCA(),
+                A.HueSaturationValue()
+            ], p=0.5),
+            A.ImageCompression(quality_lower=self.config['data_aug']['quality_lower'], quality_upper=self.config['data_aug']['quality_upper'], p=0.5)
+        ],
+            keypoint_params=A.KeypointParams(format='xy') if self.config['with_landmark'] else None
+        )
+        return trans
+    def rescale_landmarks(self, landmarks, original_size=256, new_size=224):
+        scale_factor = new_size / original_size
+        rescaled_landmarks = landmarks * scale_factor
+        return rescaled_landmarks
+    def collect_img_and_label_for_one_dataset(self, dataset_name: str):
+        """Collects image and label lists.
+        Args:
+            dataset_name (str): A list containing one dataset information. e.g., 'FF-F2F'
+        Returns:
+            list: A list of image paths.
+            list: A list of labels.
+        Raises:
+            ValueError: If image paths or labels are not found.
+            NotImplementedError: If the dataset is not implemented yet.
+        """
+        # Initialize the label and frame path lists
+        label_list = []
+        frame_path_list = []
+        # Record video name for video-level metrics
+        video_name_list = []
+        # Try to get the dataset information from the JSON file
+        if not os.path.exists(self.config['dataset_json_folder']):
+            self.config['dataset_json_folder'] = self.config['dataset_json_folder'].replace('/Youtu_Pangu_Security_Public', '/Youtu_Pangu_Security/public')
+        try:
+            with open(os.path.join(self.config['dataset_json_folder'], dataset_name + '.json'), 'r') as f:
+                dataset_info = json.load(f)
+        except Exception as e:
+            print(e)
+            raise ValueError(f'dataset {dataset_name} not exist!')
+        # If JSON file exists, do the following data collection
+        # FIXME: ugly, need to be modified here.
+        cp = None
+        if dataset_name == 'FaceForensics++_c40':
+            dataset_name = 'FaceForensics++'
+            cp = 'c40'
+        elif dataset_name == 'FF-DF_c40':
+            dataset_name = 'FF-DF'
+            cp = 'c40'
+        elif dataset_name == 'FF-F2F_c40':
+            dataset_name = 'FF-F2F'
+            cp = 'c40'
+        elif dataset_name == 'FF-FS_c40':
+            dataset_name = 'FF-FS'
+            cp = 'c40'
+        elif dataset_name == 'FF-NT_c40':
+            dataset_name = 'FF-NT'
+            cp = 'c40'
+        # Get the information for the current dataset
+        for label in dataset_info[dataset_name]:
+            sub_dataset_info = dataset_info[dataset_name][label][self.mode]
+            # Special case for FaceForensics++ and DeepFakeDetection, choose the compression type
+            if cp == None and dataset_name in ['FF-DF', 'FF-F2F', 'FF-FS', 'FF-NT', 'FaceForensics++','DeepFakeDetection','FaceShifter']:
+                sub_dataset_info = sub_dataset_info[self.compression]
+            elif cp == 'c40' and dataset_name in ['FF-DF', 'FF-F2F', 'FF-FS', 'FF-NT', 'FaceForensics++','DeepFakeDetection','FaceShifter']:
+                sub_dataset_info = sub_dataset_info['c40']
+            # Iterate over the videos in the dataset
+            for video_name, video_info in sub_dataset_info.items():
+                # Unique video name
+                unique_video_name = video_info['label'] + '_' + video_name
+                # Get the label and frame paths for the current video
+                if video_info['label'] not in self.config['label_dict']:
+                    raise ValueError(f'Label {video_info["label"]} is not found in the configuration file.')
+                label = self.config['label_dict'][video_info['label']]
+                frame_paths = video_info['frames']
+                # sorted video path to the lists
+                if '\\' in frame_paths[0]:
+                    frame_paths = sorted(frame_paths, key=lambda x: int(x.split('\\')[-1].split('.')[0]))
+                else:
+                    frame_paths = sorted(frame_paths, key=lambda x: int(x.split('/')[-1].split('.')[0]))
+                # Consider the case when the actual number of frames (e.g., 270) is larger than the specified (i.e., self.frame_num=32)
+                # In this case, we select self.frame_num frames from the original 270 frames
+                total_frames = len(frame_paths)
+                if self.frame_num < total_frames:
+                    total_frames = self.frame_num
+                    if self.video_level:
+                        # Select clip_size continuous frames
+                        start_frame = random.randint(0, total_frames - self.frame_num) if self.mode == 'train' else 0
+                        frame_paths = frame_paths[start_frame:start_frame + self.frame_num]  # update total_frames
+                    else:
+                        # Select self.frame_num frames evenly distributed throughout the video
+                        step = total_frames // self.frame_num
+                        frame_paths = [frame_paths[i] for i in range(0, total_frames, step)][:self.frame_num]
+                # If video-level methods, crop clips from the selected frames if needed
+                if self.video_level:
+                    if self.clip_size is None:
+                        raise ValueError('clip_size must be specified when video_level is True.')
+                    # Check if the number of total frames is greater than or equal to clip_size
+                    if total_frames >= self.clip_size:
+                        # Initialize an empty list to store the selected continuous frames
+                        selected_clips = []
+                        # Calculate the number of clips to select
+                        num_clips = total_frames // self.clip_size
+                        if num_clips > 1:
+                            # Calculate the step size between each clip
+                            clip_step = (total_frames - self.clip_size) // (num_clips - 1)
+                            # Select clip_size continuous frames from each part of the video
+                            for i in range(num_clips):
+                                # Ensure start_frame + self.clip_size - 1 does not exceed the index of the last frame
+                                start_frame = random.randrange(i * clip_step, min((i + 1) * clip_step, total_frames - self.clip_size + 1)) if self.mode == 'train' else i * clip_step
+                                continuous_frames = frame_paths[start_frame:start_frame + self.clip_size]
+                                assert len(continuous_frames) == self.clip_size, 'clip_size is not equal to the length of frame_path_list'
+                                selected_clips.append(continuous_frames)
+                        else:
+                            start_frame = random.randrange(0, total_frames - self.clip_size + 1) if self.mode == 'train' else 0
+                            continuous_frames = frame_paths[start_frame:start_frame + self.clip_size]
+                            assert len(continuous_frames)==self.clip_size, 'clip_size is not equal to the length of frame_path_list'
+                            selected_clips.append(continuous_frames)
+                        # Append the list of selected clips and append the label
+                        label_list.extend([label] * len(selected_clips))
+                        frame_path_list.extend(selected_clips)
+                        # video name save
+                        video_name_list.extend([unique_video_name] * len(selected_clips))
+                    else:
+                        print(f"Skipping video {unique_video_name} because it has less than clip_size ({self.clip_size}) frames ({total_frames}).")
+                # Otherwise, extend the label and frame paths to the lists according to the number of frames
+                else:
+                    # Extend the label and frame paths to the lists according to the number of frames
+                    label_list.extend([label] * total_frames)
+                    frame_path_list.extend(frame_paths)
+                    # video name save
+                    video_name_list.extend([unique_video_name] * len(frame_paths))
+        # Shuffle the label and frame path lists in the same order
+        shuffled = list(zip(label_list, frame_path_list, video_name_list))
+        random.shuffle(shuffled)
+        label_list, frame_path_list, video_name_list = zip(*shuffled)
+        return frame_path_list, label_list, video_name_list
+    def load_rgb(self, file_path):
+        """
+        Load an RGB image from a file path and resize it to a specified resolution.
+        Args:
+            file_path: A string indicating the path to the image file.
+        Returns:
+            An Image object containing the loaded and resized image.
+        Raises:
+            ValueError: If the loaded image is None.
+        """
+        size = self.config['resolution'] # if self.mode == "train" else self.config['resolution']
+        if not self.lmdb:
+            if not file_path[0] == '.':
+                file_path =  f'{self.config["rgb_dir"]}'+file_path
+            assert os.path.exists(file_path), f"{file_path} does not exist"
+            img = cv2.imread(file_path)
+            if img is None:
+                raise ValueError('Loaded image is None: {}'.format(file_path))
+        elif self.lmdb:
+            with self.env.begin(write=False) as txn:
+                # transfer the path format from rgb-path to lmdb-key
+                if file_path[0]=='.':
+                    file_path=file_path.replace('./datasets\\','')
+                image_bin = txn.get(file_path.encode())
+                image_buf = np.frombuffer(image_bin, dtype=np.uint8)
+                img = cv2.imdecode(image_buf, cv2.IMREAD_COLOR)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img = cv2.resize(img, (size, size), interpolation=cv2.INTER_CUBIC)
+        return Image.fromarray(np.array(img, dtype=np.uint8))
+    def load_mask(self, file_path):
+        """
+        Load a binary mask image from a file path and resize it to a specified resolution.
+        Args:
+            file_path: A string indicating the path to the mask file.
+        Returns:
+            A numpy array containing the loaded and resized mask.
+        Raises:
+            None.
+        """
+        size = self.config['resolution']
+        if file_path is None:
+            return np.zeros((size, size, 1))
+        if not self.lmdb:
+            if not file_path[0] == '.':
+                file_path =  f'./{self.config["rgb_dir"]}\\'+file_path
+            if os.path.exists(file_path):
+                mask = cv2.imread(file_path, 0)
+                if mask is None:
+                    mask = np.zeros((size, size))
+            else:
+                return np.zeros((size, size, 1))
+        else:
+            with self.env.begin(write=False) as txn:
+                # transfer the path format from rgb-path to lmdb-key
+                if file_path[0]=='.':
+                    file_path=file_path.replace('./datasets\\','')
+                image_bin = txn.get(file_path.encode())
+                if image_bin is None:
+                    mask = np.zeros((size, size,3))
+                else:
+                    image_buf = np.frombuffer(image_bin, dtype=np.uint8)
+                    # cv2.IMREAD_GRAYSCALE为灰度图，cv2.IMREAD_COLOR为彩色图
+                    mask = cv2.imdecode(image_buf, cv2.IMREAD_COLOR)
+        mask = cv2.resize(mask, (size, size)) / 255
+        mask = np.expand_dims(mask, axis=2)
+        return np.float32(mask)
+    def load_landmark(self, file_path):
+        """
+        Load 2D facial landmarks from a file path.
+        Args:
+            file_path: A string indicating the path to the landmark file.
+        Returns:
+            A numpy array containing the loaded landmarks.
+        Raises:
+            None.
+        """
+        if file_path is None:
+            return np.zeros((81, 2))
+        if not self.lmdb:
+            if not file_path[0] == '.':
+                file_path =  f'./{self.config["rgb_dir"]}\\'+file_path
+            if os.path.exists(file_path):
+                landmark = np.load(file_path)
+            else:
+                return np.zeros((81, 2))
+        else:
+            with self.env.begin(write=False) as txn:
+                # transfer the path format from rgb-path to lmdb-key
+                if file_path[0]=='.':
+                    file_path=file_path.replace('./datasets\\','')
+                binary = txn.get(file_path.encode())
+                landmark = np.frombuffer(binary, dtype=np.uint32).reshape((81, 2))
+                landmark=self.rescale_landmarks(np.float32(landmark), original_size=256, new_size=self.config['resolution'])
+        return landmark
+    def to_tensor(self, img):
+        """
+        Convert an image to a PyTorch tensor.
+        """
+        return T.ToTensor()(img)
+    def normalize(self, img):
+        """
+        Normalize an image.
+        """
+        mean = self.config['mean']
+        std = self.config['std']
+        normalize = T.Normalize(mean=mean, std=std)
+        return normalize(img)
+    def data_aug(self, img, landmark=None, mask=None, augmentation_seed=None):
+        """
+        Apply data augmentation to an image, landmark, and mask.
+        Args:
+            img: An Image object containing the image to be augmented.
+            landmark: A numpy array containing the 2D facial landmarks to be augmented.
+            mask: A numpy array containing the binary mask to be augmented.
+        Returns:
+            The augmented image, landmark, and mask.
+        """
+        # Set the seed for the random number generator
+        if augmentation_seed is not None:
+            random.seed(augmentation_seed)
+            np.random.seed(augmentation_seed)
+        # Create a dictionary of arguments
+        kwargs = {'image': img}
+        # Check if the landmark and mask are not None
+        if landmark is not None:
+            kwargs['keypoints'] = landmark
+            kwargs['keypoint_params'] = A.KeypointParams(format='xy')
+        if mask is not None:
+            mask = mask.squeeze(2)
+            if mask.max() > 0:
+                kwargs['mask'] = mask
+        # Apply data augmentation
+        transformed = self.transform(**kwargs)
+        # Get the augmented image, landmark, and mask
+        augmented_img = transformed['image']
+        augmented_landmark = transformed.get('keypoints')
+        augmented_mask = transformed.get('mask',mask)
+        # Convert the augmented landmark to a numpy array
+        if augmented_landmark is not None:
+            augmented_landmark = np.array(augmented_landmark)
+        # Reset the seeds to ensure different transformations for different videos
+        if augmentation_seed is not None:
+            random.seed()
+            np.random.seed()
+        return augmented_img, augmented_landmark, augmented_mask
+    def __getitem__(self, index, no_norm=False):
+        """
+        Returns the data point at the given index.
+        Args:
+            index (int): The index of the data point.
+        Returns:
+            A tuple containing the image tensor, the label tensor, the landmark tensor,
+            and the mask tensor.
+        """
+        # Get the image paths and label
+        image_paths = self.data_dict['image'][index]
+        label = self.data_dict['label'][index]
+        if not isinstance(image_paths, list):
+            image_paths = [image_paths]  # for the image-level IO, only one frame is used
+        image_tensors = []
+        landmark_tensors = []
+        mask_tensors = []
+        augmentation_seed = None
+        for image_path in image_paths:
+            # Initialize a new seed for data augmentation at the start of each video
+            if self.video_level and image_path == image_paths[0]:
+                augmentation_seed = random.randint(0, 2**32 - 1)
+            # Get the mask and landmark paths
+            mask_path = image_path.replace('frames', 'masks')  # Use .png for mask
+            landmark_path = image_path.replace('frames', 'landmarks').replace('.png', '.npy')  # Use .npy for landmark
+            # Load the image
+            try:
+                image = self.load_rgb(image_path)
+            except Exception as e:
+                # Skip this image and return the first one
+                print(f"Error loading image at index {index}: {e}")
+                return self.__getitem__(0)
+            image = np.array(image)  # Convert to numpy array for data augmentation
+            # Load mask and landmark (if needed)
+            if self.config['with_mask']:
+                mask = self.load_mask(mask_path)
+            else:
+                mask = None
+            if self.config['with_landmark']:
+                landmarks = self.load_landmark(landmark_path)
+            else:
+                landmarks = None
+            # Do Data Augmentation
+            if self.mode == 'train' and self.config['use_data_augmentation']:
+                image_trans, landmarks_trans, mask_trans = self.data_aug(image, landmarks, mask, augmentation_seed)
+            else:
+                image_trans, landmarks_trans, mask_trans = deepcopy(image), deepcopy(landmarks), deepcopy(mask)
+            # To tensor and normalize
+            if not no_norm:
+                image_trans = self.normalize(self.to_tensor(image_trans))
+                if self.config['with_landmark']:
+                    landmarks_trans = torch.from_numpy(landmarks)
+                if self.config['with_mask']:
+                    mask_trans = torch.from_numpy(mask_trans)
+            image_tensors.append(image_trans)
+            landmark_tensors.append(landmarks_trans)
+            mask_tensors.append(mask_trans)
+        if self.video_level:
+            # Stack image tensors along a new dimension (time)
+            image_tensors = torch.stack(image_tensors, dim=0)
+            # Stack landmark and mask tensors along a new dimension (time)
+            if not any(landmark is None or (isinstance(landmark, list) and None in landmark) for landmark in landmark_tensors):
+                landmark_tensors = torch.stack(landmark_tensors, dim=0)
+            if not any(m is None or (isinstance(m, list) and None in m) for m in mask_tensors):
+                mask_tensors = torch.stack(mask_tensors, dim=0)
+        else:
+            # Get the first image tensor
+            image_tensors = image_tensors[0]
+            # Get the first landmark and mask tensors
+            if not any(landmark is None or (isinstance(landmark, list) and None in landmark) for landmark in landmark_tensors):
+                landmark_tensors = landmark_tensors[0]
+            if not any(m is None or (isinstance(m, list) and None in m) for m in mask_tensors):
+                mask_tensors = mask_tensors[0]
+        return image_tensors, label, landmark_tensors, mask_tensors
+    @staticmethod
+    def collate_fn(batch):
+        """
+        Collate a batch of data points.
+        Args:
+            batch (list): A list of tuples containing the image tensor, the label tensor,
+                          the landmark tensor, and the mask tensor.
+        Returns:
+            A tuple containing the image tensor, the label tensor, the landmark tensor,
+            and the mask tensor.
+        """
+        # Separate the image, label, landmark, and mask tensors
+        images, labels, landmarks, masks = zip(*batch)
+        # Stack the image, label, landmark, and mask tensors
+        images = torch.stack(images, dim=0)
+        labels = torch.LongTensor(labels)
+        # Special case for landmarks and masks if they are None
+        if not any(landmark is None or (isinstance(landmark, list) and None in landmark) for landmark in landmarks):
+            landmarks = torch.stack(landmarks, dim=0)
+        else:
+            landmarks = None
+        if not any(m is None or (isinstance(m, list) and None in m) for m in masks):
+            masks = torch.stack(masks, dim=0)
+        else:
+            masks = None
+        # Create a dictionary of the tensors
+        data_dict = {}
+        data_dict['image'] = images
+        data_dict['label'] = labels
+        data_dict['landmark'] = landmarks
+        data_dict['mask'] = masks
+        return data_dict
+    def __len__(self):
+        """
+        Return the length of the dataset.
+        Args:
+            None.
+        Returns:
+            An integer indicating the length of the dataset.
+        Raises:
+            AssertionError: If the number of images and labels in the dataset are not equal.
+        """
+        assert len(self.image_list) == len(self.label_list), 'Number of images and labels are not equal'
+        return len(self.image_list)
+if __name__ == "__main__":
+    with open('/data/home/zhiyuanyan/DeepfakeBench/training/config/detector/video_baseline.yaml', 'r') as f:
+        config = yaml.safe_load(f)
+    train_set = DeepfakeAbstractBaseDataset(
+                config = config,
+                mode = 'train',
+            )
+    train_data_loader = \
+        torch.utils.data.DataLoader(
+            dataset=train_set,
+            batch_size=config['train_batchSize'],
+            shuffle=True,
+            num_workers=0,
+            collate_fn=train_set.collate_fn,
+        )
+    from tqdm import tqdm
+    for iteration, batch in enumerate(tqdm(train_data_loader)):
+        # print(iteration)
+        ...
+        # if iteration > 10:
+        #     break

training/dataset/albu.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import random
+import cv2
+import numpy as np
+from albumentations import DualTransform, ImageOnlyTransform
+from albumentations.augmentations.crops.functional import crop
+def isotropically_resize_image(img, size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC):
+    h, w = img.shape[:2]
+    if max(w, h) == size:
+        return img
+    if w > h:
+        scale = size / w
+        h = h * scale
+        w = size
+    else:
+        scale = size / h
+        w = w * scale
+        h = size
+    interpolation = interpolation_up if scale > 1 else interpolation_down
+    resized = cv2.resize(img, (int(w), int(h)), interpolation=interpolation)
+    return resized
+class IsotropicResize(DualTransform):
+    def __init__(self, max_side, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC,
+                 always_apply=False, p=1):
+        super(IsotropicResize, self).__init__(always_apply, p)
+        self.max_side = max_side
+        self.interpolation_down = interpolation_down
+        self.interpolation_up = interpolation_up
+    def apply(self, img, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC, **params):
+        return isotropically_resize_image(img, size=self.max_side, interpolation_down=interpolation_down,
+                                          interpolation_up=interpolation_up)
+    def apply_to_mask(self, img, **params):
+        return self.apply(img, interpolation_down=cv2.INTER_NEAREST, interpolation_up=cv2.INTER_NEAREST, **params)
+    def get_transform_init_args_names(self):
+        return ("max_side", "interpolation_down", "interpolation_up")
+class Resize4xAndBack(ImageOnlyTransform):
+    def __init__(self, always_apply=False, p=0.5):
+        super(Resize4xAndBack, self).__init__(always_apply, p)
+    def apply(self, img, **params):
+        h, w = img.shape[:2]
+        scale = random.choice([2, 4])
+        img = cv2.resize(img, (w // scale, h // scale), interpolation=cv2.INTER_AREA)
+        img = cv2.resize(img, (w, h),
+                         interpolation=random.choice([cv2.INTER_CUBIC, cv2.INTER_LINEAR, cv2.INTER_NEAREST]))
+        return img
+class RandomSizedCropNonEmptyMaskIfExists(DualTransform):
+    def __init__(self, min_max_height, w2h_ratio=[0.7, 1.3], always_apply=False, p=0.5):
+        super(RandomSizedCropNonEmptyMaskIfExists, self).__init__(always_apply, p)
+        self.min_max_height = min_max_height
+        self.w2h_ratio = w2h_ratio
+    def apply(self, img, x_min=0, x_max=0, y_min=0, y_max=0, **params):
+        cropped = crop(img, x_min, y_min, x_max, y_max)
+        return cropped
+    @property
+    def targets_as_params(self):
+        return ["mask"]
+    def get_params_dependent_on_targets(self, params):
+        mask = params["mask"]
+        mask_height, mask_width = mask.shape[:2]
+        crop_height = int(mask_height * random.uniform(self.min_max_height[0], self.min_max_height[1]))
+        w2h_ratio = random.uniform(*self.w2h_ratio)
+        crop_width = min(int(crop_height * w2h_ratio), mask_width - 1)
+        if mask.sum() == 0:
+            x_min = random.randint(0, mask_width - crop_width + 1)
+            y_min = random.randint(0, mask_height - crop_height + 1)
+        else:
+            mask = mask.sum(axis=-1) if mask.ndim == 3 else mask
+            non_zero_yx = np.argwhere(mask)
+            y, x = random.choice(non_zero_yx)
+            x_min = x - random.randint(0, crop_width - 1)
+            y_min = y - random.randint(0, crop_height - 1)
+            x_min = np.clip(x_min, 0, mask_width - crop_width)
+            y_min = np.clip(y_min, 0, mask_height - crop_height)
+        x_max = x_min + crop_height
+        y_max = y_min + crop_width
+        y_max = min(mask_height, y_max)
+        x_max = min(mask_width, x_max)
+        return {"x_min": x_min, "x_max": x_max, "y_min": y_min, "y_max": y_max}
+    def get_transform_init_args_names(self):
+        return "min_max_height", "height", "width", "w2h_ratio"

training/dataset/face_utils.py ADDED Viewed

	@@ -0,0 +1,238 @@

+import cv2
+import numpy as np
+from skimage import transform as trans
+# from mtcnn.mtcnn import MTCNN
+def get_keypts(face):
+    # get key points from the results of mtcnn
+    if len(face['keypoints']) == 0:
+        return []
+    leye = np.array(face['keypoints']['left_eye'], dtype=np.int).reshape(-1, 2)
+    reye = np.array(face['keypoints']['right_eye'],
+                    dtype=np.int).reshape(-1, 2)
+    nose = np.array(face['keypoints']['nose'], dtype=np.int).reshape(-1, 2)
+    lmouth = np.array(face['keypoints']['mouth_left'],
+                      dtype=np.int).reshape(-1, 2)
+    rmouth = np.array(face['keypoints']['mouth_right'],
+                      dtype=np.int).reshape(-1, 2)
+    pts = np.concatenate([leye, reye, nose, lmouth, rmouth], axis=0)
+    return pts
+def img_align_crop(img, landmark=None, outsize=None, scale=1.3, mask=None):
+    """ align and crop the face according to the given bbox and landmarks
+        landmark: 5 key points
+    """
+    M = None
+    target_size = [112, 112]
+    dst = np.array([
+        [30.2946, 51.6963],
+        [65.5318, 51.5014],
+        [48.0252, 71.7366],
+        [33.5493, 92.3655],
+        [62.7299, 92.2041]], dtype=np.float32)
+    if target_size[1] == 112:
+        dst[:, 0] += 8.0
+    dst[:, 0] = dst[:, 0] * outsize[0] / target_size[0]
+    dst[:, 1] = dst[:, 1] * outsize[1] / target_size[1]
+    target_size = outsize
+    margin_rate = scale - 1
+    x_margin = target_size[0] * margin_rate / 2.
+    y_margin = target_size[1] * margin_rate / 2.
+    # move
+    dst[:, 0] += x_margin
+    dst[:, 1] += y_margin
+    # resize
+    dst[:, 0] *= target_size[0] / (target_size[0] + 2 * x_margin)
+    dst[:, 1] *= target_size[1] / (target_size[1] + 2 * y_margin)
+    src = landmark.astype(np.float32)
+    # use skimage tranformation
+    tform = trans.SimilarityTransform()
+    tform.estimate(src, dst)
+    M = tform.params[0:2, :]
+    # M: use opencv
+    # M = cv2.getAffineTransform(src[[0,1,2],:],dst[[0,1,2],:])
+    img = cv2.warpAffine(img, M, (target_size[1], target_size[0]))
+    if outsize is not None:
+        img = cv2.resize(img, (outsize[1], outsize[0]))
+    if mask is not None:
+        mask = cv2.warpAffine(mask, M, (target_size[1], target_size[0]))
+        mask = cv2.resize(mask, (outsize[1], outsize[0]))
+        return img, mask
+    else:
+        return img
+def expand_bbox(bbox, width, height, scale=1.3, minsize=None):
+    """
+    Expand original boundingbox by scale.
+    :param bbx: original boundingbox
+    :param width: frame width
+    :param height: frame height
+    :param scale: bounding box size multiplier to get a bigger face region
+    :param minsize: set minimum bounding box size
+    :return: expanded bbox
+    """
+    x, y, w, h = bbox
+    # box center
+    cx = int(x + w / 2)
+    cy = int(y + h / 2)
+    # expand by scale factor
+    new_size = max(int(w * scale), int(h * scale))
+    new_x = max(0, int(cx - new_size / 2))
+    new_y = max(0, int(cy - new_size / 2))
+    # Check for too big bbox for given x, y
+    new_size = min(width - new_x, new_size)
+    new_size = min(height - new_size, new_size)
+    return new_x, new_y, new_size, new_size
+def extract_face_MTCNN(face_detector, image, expand_scale=1.3, res=256):
+    # Image size
+    height, width = image.shape[:2]
+    # Convert to rgb
+    rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    # Detect with dlib
+    faces = face_detector.detect_faces(rgb)
+    if len(faces):
+        # For now only take biggest face
+        face = None
+        bbox = None
+        max_region = 0
+        for ff in faces:
+            if max_region == 0:
+                face = ff
+                bbox = face['box']
+                max_region = bbox[2]*bbox[3]
+            else:
+                bb = ff['box']
+                region = bb[2]*bb[3]
+                if region > max_rigion:
+                    max_rigion = region
+                    face = ff
+                    bbox = face['box']
+        print(max_region)
+            #face = faces[0]
+            #bbox = face['box']
+        # --- Prediction ---------------------------------------------------
+        # Face crop with MTCNN and bounding box scale enlargement
+        x, y, w, h = expand_bbox(bbox, width, height, scale=expand_scale)
+        cropped_face = rgb[y:y+h, x:x+w]
+        cropped_face = cv2.resize(
+            cropped_face, (res, res), interpolation=cv2.INTER_CUBIC)
+        cropped_face = cv2.cvtColor(cropped_face, cv2.COLOR_RGB2BGR)
+        return cropped_face
+    return None
+def extract_aligned_face_MTCNN(face_detector, image, expand_scale=1.3, res=256, mask=None):
+    # Image size
+    height, width = image.shape[:2]
+    # Convert to rgb
+    rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    # Detect with dlib
+    faces = face_detector.detect_faces(rgb)
+    if len(faces):
+        # For now only take biggest face
+        face = None
+        bbox = None
+        max_region = 0
+        for i, ff in enumerate(faces):
+            if max_region == 0:
+                face = ff
+                bbox = face['box']
+                max_region = bbox[2]*bbox[3]
+            else:
+                bb = ff['box']
+                region = bb[2]*bb[3]
+                if region > max_region:
+                    max_region = region
+                    face = ff
+                    bbox = face['box']
+            #print('face {}: {}'.format(i, max_region))
+        #face = faces[0]
+        landmarks = get_keypts(face)
+        # --- Prediction ---------------------------------------------------
+        # Face aligned crop with MTCNN and bounding box scale enlargement
+        if mask is not None:
+            cropped_face, cropped_mask = img_align_crop(rgb, landmarks, outsize=[
+                                        res, res], scale=expand_scale, mask=mask)
+            cropped_face = cv2.cvtColor(cropped_face, cv2.COLOR_RGB2BGR)
+            cropped_mask = cv2.cvtColor(cropped_mask, cv2.COLOR_RGB2GRAY)
+            return cropped_face, cropped_mask
+        else:
+            cropped_face = img_align_crop(rgb, landmarks, outsize=[
+                                        res, res], scale=expand_scale)
+            cropped_face = cv2.cvtColor(cropped_face, cv2.COLOR_RGB2BGR)
+            return cropped_face
+    return None
+def extract_face_DLIB(face_detector, image, expand_scale=1.3, res=256):
+    # Image size
+    height, width = image.shape[:2]
+    # Convert to gray
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    # Detect with dlib
+    faces = face_detector(gray, 1)
+    if len(faces):
+        # For now only take biggest face
+        face = faces[0]
+        x1 = face.left()
+        y1 = face.top()
+        x2 = face.right()
+        y2 = face.bottom()
+        bbox = (x1, y1, x2-x1, y2-y1)
+        # --- Prediction ---------------------------------------------------
+        # Face crop with dlib and bounding box scale enlargement
+        x, y, w, h = expand_bbox(bbox, width, height, scale=expand_scale)
+        cropped_face = image[y:y+h, x:x+w]
+        cropped_face = cv2.resize(
+            cropped_face, (res, res), interpolation=cv2.INTER_CUBIC)
+        return cropped_face
+    return None

training/dataset/ff_blend.py ADDED Viewed

	@@ -0,0 +1,572 @@

+'''
+# author: Zhiyuan Yan
+# email: [email protected]
+# date: 2023-03-30
+The code is designed for Face X-ray.
+'''
+import os
+import sys
+import json
+import pickle
+import time
+import lmdb
+import numpy as np
+import albumentations as A
+import cv2
+import random
+from PIL import Image
+from skimage.util import random_noise
+from scipy import linalg
+import heapq as hq
+import lmdb
+import torch
+from torch.autograd import Variable
+from torch.utils import data
+from torchvision import transforms as T
+import torchvision
+from dataset.utils.face_blend import *
+from dataset.utils.face_align import get_align_mat_new
+from dataset.utils.color_transfer import color_transfer
+from dataset.utils.faceswap_utils import blendImages as alpha_blend_fea
+from dataset.utils.faceswap_utils import AlphaBlend as alpha_blend
+from dataset.utils.face_aug import aug_one_im, change_res
+from dataset.utils.image_ae import get_pretraiend_ae
+from dataset.utils.warp import warp_mask
+from dataset.utils import faceswap
+from scipy.ndimage.filters import gaussian_filter
+class RandomDownScale(A.core.transforms_interface.ImageOnlyTransform):
+	def apply(self,img,**params):
+		return self.randomdownscale(img)
+	def randomdownscale(self,img):
+		keep_ratio=True
+		keep_input_shape=True
+		H,W,C=img.shape
+		ratio_list=[2,4]
+		r=ratio_list[np.random.randint(len(ratio_list))]
+		img_ds=cv2.resize(img,(int(W/r),int(H/r)),interpolation=cv2.INTER_NEAREST)
+		if keep_input_shape:
+			img_ds=cv2.resize(img_ds,(W,H),interpolation=cv2.INTER_LINEAR)
+		return img_ds
+class FFBlendDataset(data.Dataset):
+    def __init__(self, config=None):
+        self.lmdb = config.get('lmdb', False)
+        if self.lmdb:
+            lmdb_path = os.path.join(config['lmdb_dir'], f"FaceForensics++_lmdb")
+            self.env = lmdb.open(lmdb_path, create=False, subdir=True, readonly=True, lock=False)
+        # Check if the dictionary has already been created
+        if os.path.exists('training/lib/nearest_face_info.pkl'):
+            with open('training/lib/nearest_face_info.pkl', 'rb') as f:
+                face_info = pickle.load(f)
+        else:
+            raise ValueError(f"Need to run the dataset/generate_xray_nearest.py before training the face xray.")
+        self.face_info = face_info
+        # Check if the dictionary has already been created
+        if os.path.exists('training/lib/landmark_dict_ffall.pkl'):
+            with open('training/lib/landmark_dict_ffall.pkl', 'rb') as f:
+                landmark_dict = pickle.load(f)
+        self.landmark_dict = landmark_dict
+        self.imid_list = self.get_training_imglist()
+        self.transforms = T.Compose([
+            # T.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0)),
+            # T.ColorJitter(hue=.05, saturation=.05),
+            # T.RandomHorizontalFlip(),
+            # T.RandomRotation(20, resample=Image.BILINEAR),
+            T.ToTensor(),
+            T.Normalize(mean=[0.5, 0.5, 0.5],
+                        std=[0.5, 0.5, 0.5])
+        ])
+        self.data_dict = {
+            'imid_list': self.imid_list
+        }
+        self.config=config
+    # def data_aug(self, im):
+    #     """
+    #     Apply data augmentation on the input image.
+    #     """
+    #     transform = T.Compose([
+    #         T.ToPILImage(),
+    #         T.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0)),
+    #         T.ColorJitter(hue=.05, saturation=.05),
+    #     ])
+    #     # Apply transformations
+    #     im_aug = transform(im)
+    #     return im_aug
+    def blended_aug(self, im):
+        transform = A.Compose([
+            A.RGBShift((-20,20),(-20,20),(-20,20),p=0.3),
+            A.HueSaturationValue(hue_shift_limit=(-0.3,0.3), sat_shift_limit=(-0.3,0.3), val_shift_limit=(-0.3,0.3), p=0.3),
+            A.RandomBrightnessContrast(brightness_limit=(-0.3,0.3), contrast_limit=(-0.3,0.3), p=0.3),
+            A.ImageCompression(quality_lower=40, quality_upper=100,p=0.5)
+        ])
+        # Apply transformations
+        im_aug = transform(image=im)
+        return im_aug['image']
+    def data_aug(self, im):
+        """
+        Apply data augmentation on the input image using albumentations.
+        """
+        transform = A.Compose([
+            A.Compose([
+                A.RGBShift((-20,20),(-20,20),(-20,20),p=0.3),
+                A.HueSaturationValue(hue_shift_limit=(-0.3,0.3), sat_shift_limit=(-0.3,0.3), val_shift_limit=(-0.3,0.3), p=1),
+                A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1,0.1), p=1),
+            ],p=1),
+            A.OneOf([
+                RandomDownScale(p=1),
+                A.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=1),
+            ],p=1),
+        ], p=1.)
+        # Apply transformations
+        im_aug = transform(image=im)
+        return im_aug['image']
+    def get_training_imglist(self):
+        """
+        Get the list of training images.
+        """
+        random.seed(1024)  # Fix the random seed for reproducibility
+        imid_list = list(self.landmark_dict.keys())
+        # imid_list = [imid.replace('landmarks', 'frames').replace('npy', 'png') for imid in imid_list]
+        random.shuffle(imid_list)
+        return imid_list
+    def load_rgb(self, file_path):
+        """
+        Load an RGB image from a file path and resize it to a specified resolution.
+        Args:
+            file_path: A string indicating the path to the image file.
+        Returns:
+            An Image object containing the loaded and resized image.
+        Raises:
+            ValueError: If the loaded image is None.
+        """
+        size = self.config['resolution'] # if self.mode == "train" else self.config['resolution']
+        if not self.lmdb:
+            if not file_path[0] == '.':
+                file_path =  f'./{self.config["rgb_dir"]}\\'+file_path
+            assert os.path.exists(file_path), f"{file_path} does not exist"
+            img = cv2.imread(file_path)
+            if img is None:
+                raise ValueError('Loaded image is None: {}'.format(file_path))
+        elif self.lmdb:
+            with self.env.begin(write=False) as txn:
+                # transfer the path format from rgb-path to lmdb-key
+                if file_path[0]=='.':
+                    file_path=file_path.replace('./datasets\\','')
+                image_bin = txn.get(file_path.encode())
+                image_buf = np.frombuffer(image_bin, dtype=np.uint8)
+                img = cv2.imdecode(image_buf, cv2.IMREAD_COLOR)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img = cv2.resize(img, (size, size), interpolation=cv2.INTER_CUBIC)
+        return np.array(img, dtype=np.uint8)
+    def load_mask(self, file_path):
+        """
+        Load a binary mask image from a file path and resize it to a specified resolution.
+        Args:
+            file_path: A string indicating the path to the mask file.
+        Returns:
+            A numpy array containing the loaded and resized mask.
+        Raises:
+            None.
+        """
+        size = self.config['resolution']
+        if file_path is None:
+            if not file_path[0] == '.':
+                file_path =  f'./{self.config["rgb_dir"]}\\'+file_path
+            return np.zeros((size, size, 1))
+        if not self.lmdb:
+            if os.path.exists(file_path):
+                mask = cv2.imread(file_path, 0)
+                if mask is None:
+                    mask = np.zeros((size, size))
+            else:
+                return np.zeros((size, size, 1))
+        else:
+            with self.env.begin(write=False) as txn:
+                # transfer the path format from rgb-path to lmdb-key
+                if file_path[0]=='.':
+                    file_path=file_path.replace('./datasets\\','')
+                image_bin = txn.get(file_path.encode())
+                image_buf = np.frombuffer(image_bin, dtype=np.uint8)
+                # cv2.IMREAD_GRAYSCALE为灰度图，cv2.IMREAD_COLOR为彩色图
+                mask = cv2.imdecode(image_buf, cv2.IMREAD_COLOR)
+        mask = cv2.resize(mask, (size, size)) / 255
+        mask = np.expand_dims(mask, axis=2)
+        return np.float32(mask)
+    def load_landmark(self, file_path):
+        """
+        Load 2D facial landmarks from a file path.
+        Args:
+            file_path: A string indicating the path to the landmark file.
+        Returns:
+            A numpy array containing the loaded landmarks.
+        Raises:
+            None.
+        """
+        if file_path is None:
+            return np.zeros((81, 2))
+        if not self.lmdb:
+            if not file_path[0] == '.':
+                file_path =  f'./{self.config["rgb_dir"]}\\'+file_path
+            if os.path.exists(file_path):
+                landmark = np.load(file_path)
+            else:
+                return np.zeros((81, 2))
+        else:
+            with self.env.begin(write=False) as txn:
+                # transfer the path format from rgb-path to lmdb-key
+                if file_path[0]=='.':
+                    file_path=file_path.replace('./datasets\\','')
+                binary = txn.get(file_path.encode())
+                landmark = np.frombuffer(binary, dtype=np.uint32).reshape((81, 2))
+        return np.float32(landmark)
+    def preprocess_images(self, imid_fg, imid_bg):
+        """
+        Load foreground and background images and face shapes.
+        """
+        fg_im = self.load_rgb(imid_fg.replace('landmarks', 'frames').replace('npy', 'png'))
+        fg_im = np.array(self.data_aug(fg_im))
+        fg_shape = self.landmark_dict[imid_fg]
+        fg_shape = np.array(fg_shape, dtype=np.int32)
+        bg_im = self.load_rgb(imid_bg.replace('landmarks', 'frames').replace('npy', 'png'))
+        bg_im = np.array(self.data_aug(bg_im))
+        bg_shape = self.landmark_dict[imid_bg]
+        bg_shape = np.array(bg_shape, dtype=np.int32)
+        if fg_im is None:
+            return bg_im, bg_shape, bg_im, bg_shape
+        elif bg_im is None:
+            return fg_im, fg_shape, fg_im, fg_shape
+        return fg_im, fg_shape, bg_im, bg_shape
+    def get_fg_bg(self, one_lmk_path):
+        """
+        Get foreground and background paths
+        """
+        bg_lmk_path = one_lmk_path
+        # Randomly pick one from the nearest neighbors for the foreground
+        if bg_lmk_path in self.face_info:
+            fg_lmk_path = random.choice(self.face_info[bg_lmk_path])
+        else:
+            fg_lmk_path = bg_lmk_path
+        return fg_lmk_path, bg_lmk_path
+    def generate_masks(self, fg_im, fg_shape, bg_im, bg_shape):
+        """
+        Generate masks for foreground and background images.
+        """
+        fg_mask = get_mask(fg_shape, fg_im, deform=False)
+        bg_mask = get_mask(bg_shape, bg_im, deform=True)
+        # # Only do the postprocess for the background mask
+        bg_mask_postprocess = warp_mask(bg_mask, std=20)
+        return fg_mask, bg_mask_postprocess
+    def warp_images(self, fg_im, fg_shape, bg_im, bg_shape, fg_mask):
+        """
+        Warp foreground face onto background image using affine or 3D warping.
+        """
+        H, W, C = bg_im.shape
+        use_3d_warp = np.random.rand() < 0.5
+        if not use_3d_warp:
+            aff_param = np.array(get_align_mat_new(fg_shape, bg_shape)).reshape(2, 3)
+            warped_face = cv2.warpAffine(fg_im, aff_param, (W, H), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REFLECT)
+            fg_mask = cv2.warpAffine(fg_mask, aff_param, (W, H), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REFLECT)
+            fg_mask = fg_mask > 0
+        else:
+            warped_face = faceswap.warp_image_3d(fg_im, np.array(fg_shape[:48]), np.array(bg_shape[:48]), (H, W))
+            fg_mask = np.mean(warped_face, axis=2) > 0
+        return warped_face, fg_mask
+    def colorTransfer(self, src, dst, mask):
+        transferredDst = np.copy(dst)
+        maskIndices = np.where(mask != 0)
+        maskedSrc = src[maskIndices[0], maskIndices[1]].astype(np.float32)
+        maskedDst = dst[maskIndices[0], maskIndices[1]].astype(np.float32)
+        # Compute means and standard deviations
+        meanSrc = np.mean(maskedSrc, axis=0)
+        stdSrc = np.std(maskedSrc, axis=0)
+        meanDst = np.mean(maskedDst, axis=0)
+        stdDst = np.std(maskedDst, axis=0)
+        # Perform color transfer
+        maskedDst = (maskedDst - meanDst) * (stdSrc / stdDst) + meanSrc
+        maskedDst = np.clip(maskedDst, 0, 255)
+        # Copy the entire background into transferredDst
+        transferredDst = np.copy(dst)
+        # Now apply color transfer only to the masked region
+        transferredDst[maskIndices[0], maskIndices[1]] = maskedDst.astype(np.uint8)
+        return transferredDst
+    def blend_images(self, color_corrected_fg, bg_im, bg_mask, featherAmount=0.2):
+        """
+        Blend foreground and background images together.
+        """
+        # normalize the mask to have values between 0 and 1
+        b_mask = bg_mask / 255.
+        # Add an extra dimension and repeat the mask to match the number of channels in color_corrected_fg and bg_im
+        b_mask = np.repeat(b_mask[:, :, np.newaxis], 3, axis=2)
+        # Compute the alpha blending
+        maskIndices = np.where(b_mask != 0)
+        maskPts = np.hstack((maskIndices[1][:, np.newaxis], maskIndices[0][:, np.newaxis]))
+        # FIXME: deal with the bugs of empty maskpts
+        if maskPts.size == 0:
+            print(f"No non-zero values found in bg_mask for blending. Skipping this image.")
+            return color_corrected_fg  # or handle this situation differently according to the needs
+        faceSize = np.max(maskPts, axis=0) - np.min(maskPts, axis=0)
+        featherAmount = featherAmount * np.max(faceSize)
+        hull = cv2.convexHull(maskPts)
+        dists = np.zeros(maskPts.shape[0])
+        for i in range(maskPts.shape[0]):
+            dists[i] = cv2.pointPolygonTest(hull, (int(maskPts[i, 0]), int(maskPts[i, 1])), True)
+        weights = np.clip(dists / featherAmount, 0, 1)
+        # Perform the blending operation
+        color_corrected_fg = color_corrected_fg.astype(float)
+        bg_im = bg_im.astype(float)
+        blended_image = np.copy(bg_im)
+        blended_image[maskIndices[0], maskIndices[1]] = weights[:, np.newaxis] * color_corrected_fg[maskIndices[0], maskIndices[1]] + (1 - weights[:, np.newaxis]) * bg_im[maskIndices[0], maskIndices[1]]
+        # Convert the blended image to 8-bit unsigned integers
+        blended_image = np.clip(blended_image, 0, 255)
+        blended_image = blended_image.astype(np.uint8)
+        return blended_image
+    def process_images(self, imid_fg, imid_bg, index):
+        """
+        Overview:
+        Process foreground and background images following the data generation pipeline (BI dataset).
+        Terminology:
+        Foreground (fg) image: The image containing the face that will be blended onto the background image.
+        Background (bg) image: The image onto which the face from the foreground image will be blended.
+        """
+        fg_im, fg_shape, bg_im, bg_shape = self.preprocess_images(imid_fg, imid_bg)
+        fg_mask, bg_mask = self.generate_masks(fg_im, fg_shape, bg_im, bg_shape)
+        warped_face, fg_mask = self.warp_images(fg_im, fg_shape, bg_im, bg_shape, fg_mask)
+        try:
+            # add the below two lines to make sure the bg_mask is strictly within the fg_mask
+            bg_mask[fg_mask == 0] = 0
+            color_corrected_fg = self.colorTransfer(bg_im, warped_face, bg_mask)
+            blended_image = self.blend_images(color_corrected_fg, bg_im, bg_mask)
+        # FIXME: ugly, in order to fix the problem of mask (all zero values for bg_mask)
+        except:
+            color_corrected_fg = self.colorTransfer(bg_im, warped_face, bg_mask)
+            blended_image = self.blend_images(color_corrected_fg, bg_im, bg_mask)
+        boundary = get_boundary(bg_mask)
+        # # Prepare images and titles for the combined image
+        # images = [fg_im, np.where(fg_mask>0, 255, 0), bg_im, bg_mask, color_corrected_fg, blended_image, np.where(boundary>0, 255, 0)]
+        # titles = ["Fg Image", "Fg Mask", "Bg Image",
+        #         "Bg Mask", "Blended Region",
+        #         "Blended Image", "Boundary"]
+        # # Save the combined image
+        # os.makedirs('facexray_examples_3', exist_ok=True)
+        # self.save_combined_image(images, titles, index, f'facexray_examples_3/combined_image_{index}.png')
+        return blended_image, boundary, bg_im
+    def post_proc(self, img):
+        '''
+        if self.mode == 'train':
+            #if np.random.rand() < 0.5:
+            #    img = random_add_noise(img)
+                #add_gaussian_noise(img)
+            if np.random.rand() < 0.5:
+                #img, _ = change_res(img)
+                img = gaussian_blur(img)
+        '''
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        im_aug = self.blended_aug(img)
+        im_aug = Image.fromarray(np.uint8(img))
+        im_aug = self.transforms(im_aug)
+        return im_aug
+    @staticmethod
+    def save_combined_image(images, titles, index, save_path):
+        """
+        Save the combined image with titles for each single image.
+        Args:
+            images (List[np.ndarray]): List of images to be combined.
+            titles (List[str]): List of titles for each image.
+            index (int): Index of the image.
+            save_path (str): Path to save the combined image.
+        """
+        # Determine the maximum height and width among the images
+        max_height = max(image.shape[0] for image in images)
+        max_width = max(image.shape[1] for image in images)
+        # Create the canvas
+        canvas = np.zeros((max_height * len(images), max_width, 3), dtype=np.uint8)
+        # Place the images and titles on the canvas
+        current_height = 0
+        for image, title in zip(images, titles):
+            height, width = image.shape[:2]
+            # Check if image has a third dimension (color channels)
+            if image.ndim == 2:
+                # If not, add a third dimension
+                image = np.tile(image[..., None], (1, 1, 3))
+            canvas[current_height : current_height + height, :width] = image
+            cv2.putText(
+                canvas, title, (10, current_height + 30),
+                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2
+            )
+            current_height += height
+        # Save the combined image
+        cv2.imwrite(save_path, canvas)
+    def __getitem__(self, index):
+        """
+        Get an item from the dataset by index.
+        """
+        one_lmk_path = self.imid_list[index]
+        try:
+            label = 1 if one_lmk_path.split('/')[6]=='manipulated_sequences' else 0
+        except Exception as e:
+            label = 1 if one_lmk_path.split('\\')[6] == 'manipulated_sequences' else 0
+        imid_fg, imid_bg = self.get_fg_bg(one_lmk_path)
+        manipulate_img, boundary, imid_bg = self.process_images(imid_fg, imid_bg, index)
+        manipulate_img = self.post_proc(manipulate_img)
+        imid_bg = self.post_proc(imid_bg)
+        boundary = torch.from_numpy(boundary)
+        boundary = boundary.unsqueeze(2).permute(2, 0, 1)
+        # fake data
+        fake_data_tuple = (manipulate_img, boundary, 1)
+        # real data
+        real_data_tuple = (imid_bg, torch.zeros_like(boundary), label)
+        return fake_data_tuple, real_data_tuple
+    @staticmethod
+    def collate_fn(batch):
+        """
+        Collates batches of data and shuffles the images.
+        """
+        # Unzip the batch
+        fake_data, real_data = zip(*batch)
+        # Unzip the fake and real data
+        fake_images, fake_boundaries, fake_labels = zip(*fake_data)
+        real_images, real_boundaries, real_labels = zip(*real_data)
+        # Combine fake and real data
+        images = torch.stack(fake_images + real_images)
+        boundaries = torch.stack(fake_boundaries + real_boundaries)
+        labels = torch.tensor(fake_labels + real_labels)
+        # Combine images, boundaries, and labels into tuples
+        combined_data = list(zip(images, boundaries, labels))
+        # Shuffle the combined data
+        random.shuffle(combined_data)
+        # Unzip the shuffled data
+        images, boundaries, labels = zip(*combined_data)
+        # Create the data dictionary
+        data_dict = {
+            'image': torch.stack(images),
+            'label': torch.tensor(labels),
+            'mask': torch.stack(boundaries),  # Assuming boundaries are your masks
+            'landmark': None  # Add your landmark data if available
+        }
+        return data_dict
+    def __len__(self):
+        """
+        Get the length of the dataset.
+        """
+        return len(self.imid_list)
+if __name__ == "__main__":
+    dataset = FFBlendDataset()
+    print('dataset lenth: ', len(dataset))
+    def tensor2bgr(im):
+        img = im.squeeze().cpu().numpy().transpose(1, 2, 0)
+        img = (img + 1)/2 * 255
+        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+        return img
+    def tensor2gray(im):
+        img = im.squeeze().cpu().numpy()
+        img = img * 255
+        return img
+    for i, data_dict in enumerate(dataset):
+        if i > 20:
+            break
+        if label == 1:
+            if not use_mouth:
+                img, boudary = im
+                cv2.imwrite('{}_whole.png'.format(i), tensor2bgr(img))
+                cv2.imwrite('{}_boudnary.png'.format(i), tensor2gray(boudary))
+            else:
+                img, mouth, boudary = im
+                cv2.imwrite('{}_whole.png'.format(i), tensor2bgr(img))
+                cv2.imwrite('{}_mouth.png'.format(i), tensor2bgr(mouth))
+                cv2.imwrite('{}_boudnary.png'.format(i), tensor2gray(boudary))

training/dataset/fwa_blend.py ADDED Viewed

	@@ -0,0 +1,548 @@

+'''
+# author: Zhiyuan Yan
+# email: [email protected]
+# date: 2023-03-30
+The code is designed for FWA and mainly modified from the below link:
+https://github.com/yuezunli/DSP-FWA
+'''
+import os
+import sys
+import json
+import pickle
+import time
+import dlib
+import numpy as np
+from copy import deepcopy
+import cv2
+import random
+from PIL import Image
+from skimage.util import random_noise
+from skimage.draw import polygon
+from scipy import linalg
+import heapq as hq
+import albumentations as A
+import torch
+from torch.autograd import Variable
+from torch.utils import data
+from torchvision import transforms as T
+import torchvision
+from dataset.utils.face_blend import *
+from dataset.utils.face_align import get_align_mat_new
+from dataset.utils.color_transfer import color_transfer
+from dataset.utils.faceswap_utils import blendImages as alpha_blend_fea
+from dataset.utils.faceswap_utils import AlphaBlend as alpha_blend
+from dataset.utils.face_aug import aug_one_im, change_res
+from dataset.utils.image_ae import get_pretraiend_ae
+from dataset.utils.warp import warp_mask
+from dataset.utils import faceswap
+from scipy.ndimage.filters import gaussian_filter
+from skimage.transform import AffineTransform, warp
+from dataset.abstract_dataset import DeepfakeAbstractBaseDataset
+# Define face detector and predictor models
+face_detector = dlib.get_frontal_face_detector()
+predictor_path = 'preprocessing/dlib_tools/shape_predictor_81_face_landmarks.dat'
+face_predictor = dlib.shape_predictor(predictor_path)
+mean_face_x = np.array([
+    0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124,
+    0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036,
+    0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918,
+    0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149,
+    0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721,
+    0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874,
+    0.553364, 0.490127, 0.42689])
+mean_face_y = np.array([
+    0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891,
+    0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326,
+    0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733,
+    0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099,
+    0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805,
+    0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746,
+    0.784792, 0.824182, 0.831803, 0.824182])
+landmarks_2D = np.stack([mean_face_x, mean_face_y], axis=1)
+class RandomDownScale(A.core.transforms_interface.ImageOnlyTransform):
+	def apply(self,img,**params):
+		return self.randomdownscale(img)
+	def randomdownscale(self,img):
+		keep_ratio=True
+		keep_input_shape=True
+		H,W,C=img.shape
+		ratio_list=[2,4]
+		r=ratio_list[np.random.randint(len(ratio_list))]
+		img_ds=cv2.resize(img,(int(W/r),int(H/r)),interpolation=cv2.INTER_NEAREST)
+		if keep_input_shape:
+			img_ds=cv2.resize(img_ds,(W,H),interpolation=cv2.INTER_LINEAR)
+		return img_ds
+def umeyama( src, dst, estimate_scale ):
+    """Estimate N-D similarity transformation with or without scaling.
+    Parameters
+    ----------
+    src : (M, N) array
+        Source coordinates.
+    dst : (M, N) array
+        Destination coordinates.
+    estimate_scale : bool
+        Whether to estimate scaling factor.
+    Returns
+    -------
+    T : (N + 1, N + 1)
+        The homogeneous similarity transformation matrix. The matrix contains
+        NaN values only if the problem is not well-conditioned.
+    References
+    ----------
+    .. [1] "Least-squares estimation of transformation parameters between two
+            point patterns", Shinji Umeyama, PAMI 1991, DOI: 10.1109/34.88573
+    """
+    num = src.shape[0]
+    dim = src.shape[1]
+    # Compute mean of src and dst.
+    src_mean = src.mean(axis=0)
+    dst_mean = dst.mean(axis=0)
+    # Subtract mean from src and dst.
+    src_demean = src - src_mean
+    dst_demean = dst - dst_mean
+    # Eq. (38).
+    A = np.dot(dst_demean.T, src_demean) / num
+    # Eq. (39).
+    d = np.ones((dim,), dtype=np.double)
+    if np.linalg.det(A) < 0:
+        d[dim - 1] = -1
+    T = np.eye(dim + 1, dtype=np.double)
+    U, S, V = np.linalg.svd(A)
+    # Eq. (40) and (43).
+    rank = np.linalg.matrix_rank(A)
+    if rank == 0:
+        return np.nan * T
+    elif rank == dim - 1:
+        if np.linalg.det(U) * np.linalg.det(V) > 0:
+            T[:dim, :dim] = np.dot(U, V)
+        else:
+            s = d[dim - 1]
+            d[dim - 1] = -1
+            T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V))
+            d[dim - 1] = s
+    else:
+        T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V.T))
+    if estimate_scale:
+        # Eq. (41) and (42).
+        scale = 1.0 / src_demean.var(axis=0).sum() * np.dot(S, d)
+    else:
+        scale = 1.0
+    T[:dim, dim] = dst_mean - scale * np.dot(T[:dim, :dim], src_mean.T)
+    T[:dim, :dim] *= scale
+    return T
+def shape_to_np(shape, dtype="int"):
+    # initialize the list of (x, y)-coordinates
+    coords = np.zeros((68, 2), dtype=dtype)
+    # loop over the 68 facial landmarks and convert them
+    # to a 2-tuple of (x, y)-coordinates
+    for i in range(0, 68):
+        coords[i] = (shape.part(i).x, shape.part(i).y)
+    # return the list of (x, y)-coordinates
+    return coords
+from skimage.transform import AffineTransform, warp
+def get_warped_face(face, landmarks, tform):
+    """
+    Apply the given affine transformation to the face and landmarks.
+    Args:
+        face (np.ndarray): The face image to be transformed.
+        landmarks (np.ndarray): The facial landmarks to be transformed.
+        tform (AffineTransform): The transformation to apply.
+    Returns:
+        warped_face (np.ndarray): The transformed face image.
+        warped_landmarks (np.ndarray): The transformed facial landmarks.
+    """
+    # Apply the transformation to the face
+    warped_face = warp(face, tform.inverse, output_shape=face.shape)
+    warped_face = (warped_face * 255).astype(np.uint8)
+    # Apply the transformation to the landmarks
+    warped_landmarks = tform.inverse(landmarks)
+    return warped_face, warped_landmarks
+def warp_face_within_landmarks(face, landmarks, tform):
+    """
+    Apply the given affine transformation to the face and landmarks,
+    and retain only the area within the landmarks.
+    Args:
+        face (np.ndarray): The face image to be transformed.
+        landmarks (np.ndarray): The facial landmarks to be transformed.
+        tform (AffineTransform): The transformation to apply.
+    Returns:
+        warped_face (np.ndarray): The transformed face image.
+        warped_landmarks (np.ndarray): The transformed facial landmarks.
+    """
+    # Apply the transformation to the face
+    warped_face = warp(face, tform.inverse, output_shape=face.shape)
+    warped_face = (warped_face * 255).astype(np.uint8)
+    # Apply the transformation to the landmarks
+    warped_landmarks = np.linalg.inv(landmarks)
+    # Generate a mask based on the landmarks
+    rr, cc = polygon(warped_landmarks[:, 1], warped_landmarks[:, 0])
+    mask = np.zeros_like(warped_face, dtype=np.uint8)
+    mask[rr, cc] = 1
+    # Apply the mask to the face
+    warped_face *= mask
+    return warped_face, warped_landmarks
+def get_2d_aligned_face(image, mat, size, padding=[0, 0]):
+    mat = mat * size
+    mat[0, 2] += padding[0]
+    mat[1, 2] += padding[1]
+    return cv2.warpAffine(image, mat, (size + 2 * padding[0], size + 2 * padding[1]))
+def get_2d_aligned_landmarks(face_cache, aligned_face_size=256, padding=(0, 0)):
+    mat, points = face_cache
+    # Mapping landmarks to aligned face
+    pred_ = np.concatenate([points, np.ones((points.shape[0], 1))], axis=-1)
+    pred_ = np.transpose(pred_)
+    mat = mat * aligned_face_size
+    mat[0, 2] += padding[0]
+    mat[1, 2] += padding[1]
+    aligned_shape = np.dot(mat, pred_)
+    aligned_shape = np.transpose(aligned_shape[:2, :])
+    return aligned_shape
+def get_aligned_face_and_landmarks(im, face_cache, aligned_face_size = 256, padding=(0, 0)):
+    """
+    get all aligned faces and landmarks of all images
+    :param imgs: origin images
+    :param fa: face_alignment package
+    :return:
+    """
+    aligned_cur_shapes = []
+    aligned_cur_im = []
+    for mat, points in face_cache:
+        # Get transform matrix
+        aligned_face = get_2d_aligned_face(im, mat, aligned_face_size, padding)
+        aligned_shape = get_2d_aligned_landmarks([mat, points], aligned_face_size, padding)
+        aligned_cur_shapes.append(aligned_shape)
+        aligned_cur_im.append(aligned_face)
+    return aligned_cur_im, aligned_cur_shapes
+def face_warp(im, face, trans_matrix, size, padding):
+    new_face = np.clip(face, 0, 255).astype(im.dtype)
+    image_size = im.shape[1], im.shape[0]
+    tmp_matrix = trans_matrix * size
+    delta_matrix = np.array([[0., 0., padding[0]*1.0], [0., 0., padding[1]*1.0]])
+    tmp_matrix = tmp_matrix + delta_matrix
+    # Warp the new face onto a blank canvas
+    warped_face = np.zeros_like(im)
+    cv2.warpAffine(new_face, tmp_matrix, image_size, warped_face, cv2.WARP_INVERSE_MAP,
+                   cv2.BORDER_TRANSPARENT)
+    # Create a mask of the warped face
+    mask = (warped_face > 0).astype(np.uint8)
+    # Blend the warped face with the original image
+    new_image = im * (1 - mask) + warped_face * mask
+    return new_image, mask
+def get_face_loc(im, face_detector, scale=0):
+    """ get face locations, color order of images is rgb """
+    faces = face_detector(np.uint8(im), scale)
+    face_list = []
+    if faces is not None or len(faces) > 0:
+        for i, d in enumerate(faces):
+            try:
+                face_list.append([d.left(), d.top(), d.right(), d.bottom()])
+            except:
+                face_list.append([d.rect.left(), d.rect.top(), d.rect.right(), d.rect.bottom()])
+    return face_list
+def align(im, face_detector, lmark_predictor, scale=0):
+    # This version we handle all faces in view
+    # channel order rgb
+    im = np.uint8(im)
+    faces = face_detector(im, scale)
+    face_list = []
+    if faces is not None or len(faces) > 0:
+        for pred in faces:
+            try:
+                points = shape_to_np(lmark_predictor(im, pred))
+            except:
+                points = shape_to_np(lmark_predictor(im, pred.rect))
+            trans_matrix = umeyama(points[17:], landmarks_2D, True)[0:2]
+            face_list.append([trans_matrix, points])
+    return face_list
+class FWABlendDataset(DeepfakeAbstractBaseDataset):
+    def __init__(self, config=None):
+        super().__init__(config, mode='train')
+        self.transforms = T.Compose([
+            T.ToTensor(),
+            T.Normalize(mean=config['mean'],
+                        std=config['std'])
+        ])
+        self.resolution = config['resolution']
+    def blended_aug(self, im):
+        transform = A.Compose([
+            A.RGBShift((-20,20),(-20,20),(-20,20),p=0.3),
+            A.HueSaturationValue(hue_shift_limit=(-0.3,0.3), sat_shift_limit=(-0.3,0.3), val_shift_limit=(-0.3,0.3), p=0.3),
+            A.RandomBrightnessContrast(brightness_limit=(-0.3,0.3), contrast_limit=(-0.3,0.3), p=0.3),
+            A.ImageCompression(quality_lower=40, quality_upper=100,p=0.5)
+        ])
+        # Apply transformations
+        im_aug = transform(image=im)
+        return im_aug['image']
+    def data_aug(self, im):
+        """
+        Apply data augmentation on the input image using albumentations.
+        """
+        transform = A.Compose([
+            A.Compose([
+                A.RGBShift((-20,20),(-20,20),(-20,20),p=0.3),
+                A.HueSaturationValue(hue_shift_limit=(-0.3,0.3), sat_shift_limit=(-0.3,0.3), val_shift_limit=(-0.3,0.3), p=1),
+                A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1,0.1), p=1),
+            ],p=1),
+            A.OneOf([
+                RandomDownScale(p=1),
+                A.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=1),
+            ],p=1),
+        ], p=1.)
+        # Apply transformations
+        im_aug = transform(image=im)
+        return im_aug['image']
+    def blend_images(self, img_path):
+        #im = cv2.imread(img_path)
+        im = np.array(self.load_rgb(img_path))
+        # Get the alignment of the head
+        face_cache = align(im, face_detector, face_predictor)
+        # Get the aligned face and landmarks
+        aligned_im_head, aligned_shape = get_aligned_face_and_landmarks(im, face_cache)
+        # If no faces were detected in the image, return None (or any suitable value)
+        if len(aligned_im_head) == 0 or len(aligned_shape) == 0:
+            return None, None
+        aligned_im_head = aligned_im_head[0]
+        aligned_shape = aligned_shape[0]
+        # Apply transformations to the face
+        scale_factor = random.choice([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
+        scaled_face = cv2.resize(aligned_im_head, (0, 0), fx=scale_factor, fy=scale_factor)
+        # Apply Gaussian blur to the scaled face
+        blurred_face = cv2.GaussianBlur(scaled_face, (5, 5), 0)
+        # Resize the processed image back to the original size
+        resized_face = cv2.resize(blurred_face, (aligned_im_head.shape[1], aligned_im_head.shape[0]))
+        # Generate a random facial mask
+        mask = get_mask(aligned_shape.astype(np.float32), resized_face, std=20, deform=True)
+        # Apply the mask to the resized face
+        masked_face = cv2.bitwise_and(resized_face, resized_face, mask=mask)
+        # do aug before warp
+        im = np.array(self.blended_aug(im))
+        # Warp the face back to the original image
+        im, masked_face = face_warp(im, masked_face, face_cache[0][0], self.resolution, [0, 0])
+        shape = get_2d_aligned_landmarks(face_cache[0], self.resolution, [0, 0])
+        return im, masked_face
+    def process_images(self, img_path, index):
+        """
+        Process an image following the data generation pipeline.
+        """
+        blended_im, mask = self.blend_images(img_path)
+        # Prepare images and titles for the combined image
+        imid_fg = np.array(self.load_rgb(img_path))
+        imid_fg = np.array(self.data_aug(imid_fg))
+        if blended_im is None or mask is None:
+            return imid_fg, None
+        # images = [
+        #     imid_fg,
+        #     np.where(mask.astype(np.uint8)>0, 255, 0),
+        #     blended_im,
+        # ]
+        # titles = ["Image", "Mask", "Blended Image"]
+        # # Save the combined image
+        # os.makedirs('fwa_examples_2', exist_ok=True)
+        # self.save_combined_image(images, titles, index, f'fwa_examples_2/combined_image_{index}.png')
+        return imid_fg, blended_im
+    def post_proc(self, img):
+        '''
+        if self.mode == 'train':
+            #if np.random.rand() < 0.5:
+            #    img = random_add_noise(img)
+                #add_gaussian_noise(img)
+            if np.random.rand() < 0.5:
+                #img, _ = change_res(img)
+                img = gaussian_blur(img)
+        '''
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        im_aug = self.blended_aug(img)
+        im_aug = Image.fromarray(np.uint8(img))
+        im_aug = self.transforms(im_aug)
+        return im_aug
+    @staticmethod
+    def save_combined_image(images, titles, index, save_path):
+        """
+        Save the combined image with titles for each single image.
+        Args:
+            images (List[np.ndarray]): List of images to be combined.
+            titles (List[str]): List of titles for each image.
+            index (int): Index of the image.
+            save_path (str): Path to save the combined image.
+        """
+        # Determine the maximum height and width among the images
+        max_height = max(image.shape[0] for image in images)
+        max_width = max(image.shape[1] for image in images)
+        # Create the canvas
+        canvas = np.zeros((max_height * len(images), max_width, 3), dtype=np.uint8)
+        # Place the images and titles on the canvas
+        current_height = 0
+        for image, title in zip(images, titles):
+            height, width = image.shape[:2]
+            # Check if image has a third dimension (color channels)
+            if image.ndim == 2:
+                # If not, add a third dimension
+                image = np.tile(image[..., None], (1, 1, 3))
+            canvas[current_height : current_height + height, :width] = image
+            cv2.putText(
+                canvas, title, (10, current_height + 30),
+                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2
+            )
+            current_height += height
+        # Save the combined image
+        cv2.imwrite(save_path, canvas)
+    def __getitem__(self, index):
+        """
+        Get an item from the dataset by index.
+        """
+        one_img_path = self.data_dict['image'][index]
+        try:
+            label = 1 if one_img_path.split('/')[6]=='manipulated_sequences' else 0
+        except Exception as e:
+            label = 1 if one_img_path.split('\\')[6] == 'manipulated_sequences' else 0
+        blend_label = 1
+        imid, manipulate_img = self.process_images(one_img_path, index)
+        if manipulate_img is None:
+            manipulate_img = deepcopy(imid)
+            blend_label = label
+        manipulate_img = self.post_proc(manipulate_img)
+        imid = self.post_proc(imid)
+        # blend data
+        fake_data_tuple = (manipulate_img, blend_label)
+        # original data
+        real_data_tuple = (imid, label)
+        return fake_data_tuple, real_data_tuple
+    @staticmethod
+    def collate_fn(batch):
+        """
+        Collates batches of data and shuffles the images.
+        """
+        # Unzip the batch
+        fake_data, real_data = zip(*batch)
+        # Unzip the fake and real data
+        fake_images, fake_labels = zip(*fake_data)
+        real_images, real_labels = zip(*real_data)
+        # Combine fake and real data
+        images = torch.stack(fake_images + real_images)
+        labels = torch.tensor(fake_labels + real_labels)
+        # Combine images, boundaries, and labels into tuples
+        combined_data = list(zip(images, labels))
+        # Shuffle the combined data
+        random.shuffle(combined_data)
+        # Unzip the shuffled data
+        images, labels = zip(*combined_data)
+        # Create the data dictionary
+        data_dict = {
+            'image': torch.stack(images),
+            'label': torch.tensor(labels),
+            'mask': None,
+            'landmark': None  # Add your landmark data if available
+        }
+        return data_dict

training/dataset/generate_parsing_mask.py ADDED Viewed

	@@ -0,0 +1,129 @@

+'''
+# author: Zhiyuan Yan
+# email: [email protected]
+# date: 2024-01-26
+The code is designed for self-blending method (SBI, CVPR 2024).
+'''
+import sys
+sys.path.append('.')
+import os
+import cv2
+import yaml
+import random
+import torch
+import torch.nn as nn
+from PIL import Image
+import numpy as np
+from copy import deepcopy
+import albumentations as A
+from training.dataset.abstract_dataset import DeepfakeAbstractBaseDataset
+from training.dataset.sbi_api import SBI_API
+from training.dataset.utils.bi_online_generation_yzy import random_get_hull
+from training.dataset.SimSwap.test_one_image import self_blend
+import warnings
+warnings.filterwarnings('ignore')
+from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+image_processor = SegformerImageProcessor.from_pretrained("/Youtu_Pangu_Security_Public/youtu-pangu-public/zhiyuanyan/huggingface/hub/models--jonathandinu--face-parsing/snapshots/a2bf62f39dfd8f8856a3c19be8b0707a8d68abdd")
+face_parser = SegformerForSemanticSegmentation.from_pretrained("/Youtu_Pangu_Security_Public/youtu-pangu-public/zhiyuanyan/huggingface/hub/models--jonathandinu--face-parsing/snapshots/a2bf62f39dfd8f8856a3c19be8b0707a8d68abdd").to(device)
+def create_facial_mask(mask, with_neck=False):
+    facial_labels = [1, 2, 3, 4, 5, 6, 7, 10, 11, 12]
+    if with_neck:
+        facial_labels += [17]
+    facial_mask = np.zeros_like(mask, dtype=bool)
+    for label in facial_labels:
+        facial_mask |= (mask == label)
+    return facial_mask.astype(np.uint8) * 255
+def face_parsing_mask(img1, with_neck=False):
+    # run inference on image
+    img1 = Image.fromarray(img1)
+    inputs = image_processor(images=img1, return_tensors="pt").to(device)
+    outputs = face_parser(**inputs)
+    logits = outputs.logits  # shape (batch_size, num_labels, ~height/4, ~width/4)
+    # resize output to match input image dimensions
+    upsampled_logits = nn.functional.interpolate(logits,
+                    size=img1.size[::-1], # H x W
+                    mode='bilinear',
+                    align_corners=False)
+    labels = upsampled_logits.argmax(dim=1)[0]
+    mask = labels.cpu().numpy()
+    mask = create_facial_mask(mask, with_neck)
+    return mask
+class YZYDataset(DeepfakeAbstractBaseDataset):
+    def __init__(self, config=None, mode='train'):
+        super().__init__(config, mode)
+        # Get real lists
+        # Fix the label of real images to be 0
+        self.real_imglist = [(img, label) for img, label in zip(self.image_list, self.label_list) if label == 0]
+    def __getitem__(self, index):
+        # Get the real image paths and labels
+        real_image_path, real_label = self.real_imglist[index]
+        # real_image_path = real_image_path.replace('/Youtu_Pangu_Security_Public/', '/Youtu_Pangu_Security/public/')
+        # Load the real images
+        real_image = self.load_rgb(real_image_path)
+        real_image = np.array(real_image)  # Convert to numpy array
+        # Face Parsing
+        mask = face_parsing_mask(real_image, with_neck=False)
+        parse_mask_path = real_image_path.replace('frames', 'parse_mask')
+        os.makedirs(os.path.dirname(parse_mask_path), exist_ok=True)
+        cv2.imwrite(parse_mask_path, mask)
+        # # SRI generation
+        # sri_image = self_blend(real_image)
+        # sri_path = real_image_path.replace('frames', 'sri_frames')
+        # os.makedirs(os.path.dirname(sri_path), exist_ok=True)
+        # cv2.imwrite(sri_path, sri_image)
+    @staticmethod
+    def collate_fn(batch):
+        data_dict = {
+            'image': None,
+            'label': None,
+            'landmark': None,
+            'mask': None,
+        }
+        return data_dict
+    def __len__(self):
+        return len(self.real_imglist)
+if __name__ == '__main__':
+    with open('./training/config/detector/sbi.yaml', 'r') as f:
+        config = yaml.safe_load(f)
+    with open('./training/config/train_config.yaml', 'r') as f:
+        config2 = yaml.safe_load(f)
+    config2['data_manner'] = 'lmdb'
+    config['dataset_json_folder'] = '/Youtu_Pangu_Security_Public/youtu-pangu-public/zhiyuanyan/DeepfakeBenchv2/preprocessing/dataset_json'
+    config.update(config2)
+    train_set = YZYDataset(config=config, mode='train')
+    train_data_loader = \
+        torch.utils.data.DataLoader(
+            dataset=train_set,
+            batch_size=config['train_batchSize'],
+            shuffle=True,
+            num_workers=0,
+            collate_fn=train_set.collate_fn,
+        )
+    from tqdm import tqdm
+    for iteration, batch in enumerate(tqdm(train_data_loader)):
+        print(iteration)

training/dataset/generate_xray_nearest.py ADDED Viewed

	@@ -0,0 +1,136 @@

+'''
+# author: Zhiyuan Yan
+# email: [email protected]
+# date: 2023-03-30
+The code is specifically designed for generating nearest sample pairs for Face X-ray.
+Alternatively, you can utilize the pre-generated pkl files available in our GitHub repository. Please refer to the "Releases" section on our repository for accessing these files.
+'''
+import os
+import json
+import pickle
+import numpy as np
+import heapq
+import random
+from tqdm import tqdm
+from scipy.spatial import KDTree
+def load_landmark(file_path):
+    """
+    Load 2D facial landmarks from a file path.
+    Args:
+        file_path: A string indicating the path to the landmark file.
+    Returns:
+        A numpy array containing the loaded landmarks.
+    Raises:
+        None.
+    """
+    if file_path is None:
+        return np.zeros((81, 2))
+    if os.path.exists(file_path):
+        landmark = np.load(file_path)
+        return np.float32(landmark)
+    else:
+        return np.zeros((81, 2))
+def get_landmark_dict(dataset_folder):
+    # Check if the dictionary has already been created
+    if os.path.exists('landmark_dict_ff.pkl'):
+        with open('landmark_dict_ff.pkl', 'rb') as f:
+            return pickle.load(f)
+    # Open the metadata file for the current folder
+    metadata_path = os.path.join(dataset_folder, "FaceForensics++.json")
+    with open(metadata_path, "r") as f:
+        metadata = json.load(f)
+    # Iterate over the metadata entries and add the landmark paths to the list
+    ff_real_data = metadata['FaceForensics++']['FF-real']
+    # Using dictionary comprehension to generate the landmark_dict
+    landmark_dict = {
+        frame_path.replace('frames', 'landmarks').replace(".png", ".npy"): load_landmark(
+            frame_path.replace('frames', 'landmarks').replace(".png", ".npy")
+        )
+        for mode, value in ff_real_data.items()
+        for video_name, video_info in tqdm(value['c23'].items())
+        for frame_path in video_info['frames']
+    }
+    # Save the dictionary to a pickle file
+    with open('landmark_dict_ffall.pkl', 'wb') as f:
+        pickle.dump(landmark_dict, f)
+    return landmark_dict
+def get_nearest_faces_fixed_pair(landmark_info, num_neighbors):
+    '''
+    Using KDTree to find the nearest faces for each image (Much faster!!)
+    '''
+    random.seed(1024)  # Fix the random seed for reproducibility
+    # Check if the dictionary has already been created
+    if os.path.exists('nearest_face_info.pkl'):
+        with open('nearest_face_info.pkl', 'rb') as f:
+            return pickle.load(f)
+    landmarks_array = np.array([lmk.flatten() for lmk in landmark_info.values()])
+    landmark_ids = list(landmark_info.keys())
+    # Build a KDTree using the flattened landmarks
+    tree = KDTree(landmarks_array)
+    nearest_faces = {}
+    for idx, this_lmk in tqdm(enumerate(landmarks_array), total=len(landmarks_array)):
+        # Query the KDTree for the nearest neighbors (excluding itself)
+        dists, indices = tree.query(this_lmk, k=num_neighbors + 1)
+        # Randomly pick one from the nearest N neighbors (excluding itself)
+        picked_idx = random.choice(indices[1:])
+        nearest_faces[landmark_ids[idx]] = landmark_ids[picked_idx]
+    # Save the dictionary to a pickle file
+    with open('nearest_face_info.pkl', 'wb') as f:
+        pickle.dump(nearest_faces, f)
+    return nearest_faces
+def get_nearest_faces(landmark_info, num_neighbors):
+    '''
+    Using KDTree to find the nearest faces for each image (Much faster!!)
+    '''
+    random.seed(1024)  # Fix the random seed for reproducibility
+    # Check if the dictionary has already been created
+    if os.path.exists('nearest_face_info.pkl'):
+        with open('nearest_face_info.pkl', 'rb') as f:
+            return pickle.load(f)
+    landmarks_array = np.array([lmk.flatten() for lmk in landmark_info.values()])
+    landmark_ids = list(landmark_info.keys())
+    # Build a KDTree using the flattened landmarks
+    tree = KDTree(landmarks_array)
+    nearest_faces = {}
+    for idx, this_lmk in tqdm(enumerate(landmarks_array), total=len(landmarks_array)):
+        # Query the KDTree for the nearest neighbors (excluding itself)
+        dists, indices = tree.query(this_lmk, k=num_neighbors + 1)
+        # Store the nearest N neighbors (excluding itself)
+        nearest_faces[landmark_ids[idx]] = [landmark_ids[i] for i in indices[1:]]
+    # Save the dictionary to a pickle file
+    with open('nearest_face_info.pkl', 'wb') as f:
+        pickle.dump(nearest_faces, f)
+    return nearest_faces
+# Load the landmark dictionary and obtain the landmark dict
+dataset_folder = "/home/zhiyuanyan/disfin/deepfake_benchmark/preprocessing/dataset_json/"
+landmark_info = get_landmark_dict(dataset_folder)
+# Get the nearest faces for each image (in landmark_dict)
+num_neighbors = 100
+nearest_faces_info = get_nearest_faces(landmark_info, num_neighbors)  # running time: about 20 mins

training/dataset/iid_dataset.py ADDED Viewed

	@@ -0,0 +1,116 @@

+'''
+# author: Zhiyuan Yan
+# email: [email protected]
+# date: 2023-03-30
+The code is designed for scenarios such as disentanglement-based methods where it is necessary to ensure an equal number of positive and negative samples.
+'''
+import os.path
+from copy import deepcopy
+import cv2
+import math
+import torch
+import random
+import yaml
+from PIL import Image, ImageDraw
+import numpy as np
+from torch.utils.data import DataLoader
+from dataset.abstract_dataset import DeepfakeAbstractBaseDataset
+class IIDDataset(DeepfakeAbstractBaseDataset):
+    def __init__(self, config=None, mode='train'):
+        super().__init__(config, mode)
+    def __getitem__(self, index):
+        # Get the image paths and label
+        image_path = self.data_dict['image'][index]
+        if '\\' in image_path:
+            per = image_path.split('\\')[-2]
+        else:
+            per = image_path.split('/')[-2]
+        id_index = int(per.split('_')[-1])  # real video id
+        label = self.data_dict['label'][index]
+        # Load the image
+        try:
+            image = self.load_rgb(image_path)
+        except Exception as e:
+            # Skip this image and return the first one
+            print(f"Error loading image at index {index}: {e}")
+            return self.__getitem__(0)
+        image = np.array(image)  # Convert to numpy array for data augmentation
+        # Do Data Augmentation
+        image_trans,_,_ = self.data_aug(image)
+        # To tensor and normalize
+        image_trans = self.normalize(self.to_tensor(image_trans))
+        return id_index, image_trans, label
+    @staticmethod
+    def collate_fn(batch):
+        """
+        Collate a batch of data points.
+        Args:
+            batch (list): A list of tuples containing the image tensor, the label tensor,
+                          the landmark tensor, and the mask tensor.
+        Returns:
+            A tuple containing the image tensor, the label tensor, the landmark tensor,
+            and the mask tensor.
+        """
+        # Separate the image, label, landmark, and mask tensors
+        id_indexes, image_trans, label = zip(*batch)
+        # Stack the image, label, landmark, and mask tensors
+        images = torch.stack(image_trans, dim=0)
+        labels = torch.LongTensor(label)
+        ids = torch.LongTensor(id_indexes)
+        # Create a dictionary of the tensors
+        data_dict = {}
+        data_dict['image'] = images
+        data_dict['label'] = labels
+        data_dict['id_index'] = ids
+        data_dict['mask']=None
+        data_dict['landmark']=None
+        return data_dict
+def draw_landmark(img,landmark):
+    draw = ImageDraw.Draw(img)
+    # landmark = np.stack([mean_face_x, mean_face_y], axis=1)
+    # landmark *=256
+    # 遍历每个特征点
+    for i, point in enumerate(landmark):
+        # 在图像上标记特征点
+        draw.ellipse((point[0] - 1, point[1] - 1, point[0] + 1, point[1] + 1), fill=(255, 0, 0))
+        # 在特征点旁边添加序号
+        draw.text((point[0], point[1]), str(i), fill=(255, 255, 255))
+    return img
+if __name__ == '__main__':
+    detector_path = r"./training/config/detector/xception.yaml"
+    # weights_path = "./ckpts/xception/CDFv2/tb_v1/ov.pth"
+    with open(detector_path, 'r') as f:
+        config = yaml.safe_load(f)
+    with open('./training/config/train_config.yaml', 'r') as f:
+        config2 = yaml.safe_load(f)
+    config2['data_manner'] = 'lmdb'
+    config['dataset_json_folder'] = 'preprocessing/dataset_json_v3'
+    config.update(config2)
+    dataset = IIDDataset(config=config)
+    batch_size = 2
+    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True,collate_fn=dataset.collate_fn)
+    for i, batch in enumerate(dataloader):
+        print(f"Batch {i}: {batch}")
+        # 如果数据集返回的是一个元组（例如，(data, target)），可以这样获取：
+        img = batch['img']

training/dataset/library/000_0000.png ADDED Viewed

training/dataset/library/001_0000.png ADDED Viewed

training/dataset/library/DeepFakeMask.py ADDED Viewed

	@@ -0,0 +1,181 @@

+#!/usr/bin/python
+# -*- coding: UTF-8 -*-
+# Created by: algohunt
+# Microsoft Research & Peking University
+# [email protected]
+# Copyright (c) 2019
+#!/usr/bin/env python3
+""" Masks functions for faceswap.py """
+import inspect
+import logging
+import sys
+import cv2
+import numpy as np
+# logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
+def get_available_masks():
+    """ Return a list of the available masks for cli """
+    masks = sorted([name for name, obj in inspect.getmembers(sys.modules[__name__])
+                    if inspect.isclass(obj) and name != "Mask"])
+    masks.append("none")
+    # logger.debug(masks)
+    return masks
+def get_default_mask():
+    """ Set the default mask for cli """
+    masks = get_available_masks()
+    default = "dfl_full"
+    default = default if default in masks else masks[0]
+    # logger.debug(default)
+    return default
+class Mask():
+    """ Parent class for masks
+        the output mask will be <mask_type>.mask
+        channels: 1, 3 or 4:
+                    1 - Returns a single channel mask
+                    3 - Returns a 3 channel mask
+                    4 - Returns the original image with the mask in the alpha channel """
+    def __init__(self, landmarks, face, channels=4):
+        # logger.info("Initializing %s: (face_shape: %s, channels: %s, landmarks: %s)",
+        #              self.__class__.__name__, face.shape, channels, landmarks)
+        self.landmarks = landmarks
+        self.face = face
+        self.channels = channels
+        mask = self.build_mask()
+        self.mask = self.merge_mask(mask)
+        #logger.info("Initialized %s", self.__class__.__name__)
+    def build_mask(self):
+        """ Override to build the mask """
+        raise NotImplementedError
+    def merge_mask(self, mask):
+        """ Return the mask in requested shape """
+        #logger.info("mask_shape: %s", mask.shape)
+        assert self.channels in (1, 3, 4), "Channels should be 1, 3 or 4"
+        assert mask.shape[2] == 1 and mask.ndim == 3, "Input mask be 3 dimensions with 1 channel"
+        if self.channels == 3:
+            retval = np.tile(mask, 3)
+        elif self.channels == 4:
+            retval = np.concatenate((self.face, mask), -1)
+        else:
+            retval = mask
+        #logger.info("Final mask shape: %s", retval.shape)
+        return retval
+class dfl_full(Mask):  # pylint: disable=invalid-name
+    """ DFL facial mask """
+    def build_mask(self):
+        mask = np.zeros(self.face.shape[0:2] + (1, ), dtype=np.float32)
+        nose_ridge = (self.landmarks[27:31], self.landmarks[33:34])
+        jaw = (self.landmarks[0:17],
+               self.landmarks[48:68],
+               self.landmarks[0:1],
+               self.landmarks[8:9],
+               self.landmarks[16:17])
+        eyes = (self.landmarks[17:27],
+                self.landmarks[0:1],
+                self.landmarks[27:28],
+                self.landmarks[16:17],
+                self.landmarks[33:34])
+        parts = [jaw, nose_ridge, eyes]
+        for item in parts:
+            merged = np.concatenate(item)
+            cv2.fillConvexPoly(mask, cv2.convexHull(merged), 255.)  # pylint: disable=no-member
+        return mask
+class components(Mask):  # pylint: disable=invalid-name
+    """ Component model mask """
+    def build_mask(self):
+        mask = np.zeros(self.face.shape[0:2] + (1, ), dtype=np.float32)
+        r_jaw = (self.landmarks[0:9], self.landmarks[17:18])
+        l_jaw = (self.landmarks[8:17], self.landmarks[26:27])
+        r_cheek = (self.landmarks[17:20], self.landmarks[8:9])
+        l_cheek = (self.landmarks[24:27], self.landmarks[8:9])
+        nose_ridge = (self.landmarks[19:25], self.landmarks[8:9],)
+        r_eye = (self.landmarks[17:22],
+                 self.landmarks[27:28],
+                 self.landmarks[31:36],
+                 self.landmarks[8:9])
+        l_eye = (self.landmarks[22:27],
+                 self.landmarks[27:28],
+                 self.landmarks[31:36],
+                 self.landmarks[8:9])
+        nose = (self.landmarks[27:31], self.landmarks[31:36])
+        parts = [r_jaw, l_jaw, r_cheek, l_cheek, nose_ridge, r_eye, l_eye, nose]
+        for item in parts:
+            merged = np.concatenate(item)
+            cv2.fillConvexPoly(mask, cv2.convexHull(merged), 255.)  # pylint: disable=no-member
+        return mask
+class extended(Mask):  # pylint: disable=invalid-name
+    """ Extended mask
+        Based on components mask. Attempts to extend the eyebrow points up the forehead
+    """
+    def build_mask(self):
+        mask = np.zeros(self.face.shape[0:2] + (1, ), dtype=np.float32)
+        landmarks = self.landmarks.copy()
+        # mid points between the side of face and eye point
+        ml_pnt = (landmarks[36] + landmarks[0]) // 2
+        mr_pnt = (landmarks[16] + landmarks[45]) // 2
+        # mid points between the mid points and eye
+        ql_pnt = (landmarks[36] + ml_pnt) // 2
+        qr_pnt = (landmarks[45] + mr_pnt) // 2
+        # Top of the eye arrays
+        bot_l = np.array((ql_pnt, landmarks[36], landmarks[37], landmarks[38], landmarks[39]))
+        bot_r = np.array((landmarks[42], landmarks[43], landmarks[44], landmarks[45], qr_pnt))
+        # Eyebrow arrays
+        top_l = landmarks[17:22]
+        top_r = landmarks[22:27]
+        # Adjust eyebrow arrays
+        landmarks[17:22] = top_l + ((top_l - bot_l) // 2)
+        landmarks[22:27] = top_r + ((top_r - bot_r) // 2)
+        r_jaw = (landmarks[0:9], landmarks[17:18])
+        l_jaw = (landmarks[8:17], landmarks[26:27])
+        r_cheek = (landmarks[17:20], landmarks[8:9])
+        l_cheek = (landmarks[24:27], landmarks[8:9])
+        nose_ridge = (landmarks[19:25], landmarks[8:9],)
+        r_eye = (landmarks[17:22], landmarks[27:28], landmarks[31:36], landmarks[8:9])
+        l_eye = (landmarks[22:27], landmarks[27:28], landmarks[31:36], landmarks[8:9])
+        nose = (landmarks[27:31], landmarks[31:36])
+        parts = [r_jaw, l_jaw, r_cheek, l_cheek, nose_ridge, r_eye, l_eye, nose]
+        for item in parts:
+            merged = np.concatenate(item)
+            cv2.fillConvexPoly(mask, cv2.convexHull(merged), 255.)  # pylint: disable=no-member
+        return mask
+class facehull(Mask):  # pylint: disable=invalid-name
+    """ Basic face hull mask """
+    def build_mask(self):
+        mask = np.zeros(self.face.shape[0:2] + (1, ), dtype=np.float32)
+        hull = cv2.convexHull(  # pylint: disable=no-member
+            np.array(self.landmarks).reshape((-1, 2)))
+        cv2.fillConvexPoly(mask, hull, 255.0, lineType=cv2.LINE_AA)  # pylint: disable=no-member
+        return mask

training/dataset/library/LICENSE ADDED Viewed

	@@ -0,0 +1,674 @@

+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+                            Preamble
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+  The precise terms and conditions for copying, distribution and
+modification follow.
+                       TERMS AND CONDITIONS
+  0. Definitions.
+  "This License" refers to version 3 of the GNU General Public License.
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+  1. Source Code.
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+  The Corresponding Source for a work in source code form is that
+same work.
+  2. Basic Permissions.
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+  4. Conveying Verbatim Copies.
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+  5. Conveying Modified Source Versions.
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+  6. Conveying Non-Source Forms.
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+  7. Additional Terms.
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+  8. Termination.
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+  9. Acceptance Not Required for Having Copies.
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+  10. Automatic Licensing of Downstream Recipients.
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+  11. Patents.
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+  12. No Surrender of Others' Freedom.
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+  13. Use with the GNU Affero General Public License.
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+  14. Revised Versions of this License.
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+  15. Disclaimer of Warranty.
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+  16. Limitation of Liability.
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+  17. Interpretation of Sections 15 and 16.
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+                     END OF TERMS AND CONDITIONS
+            How to Apply These Terms to Your New Programs
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+Also add information on how to contact you by electronic and paper mail.
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<https://www.gnu.org/licenses/>.
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<https://www.gnu.org/licenses/why-not-lgpl.html>.

training/dataset/library/README.md ADDED Viewed

	@@ -0,0 +1,12 @@

+# Face-X-ray
+The author's unofficial PyTorch re-implementation of Face Xray
+This repo contains code for the BI data generation pipeline from  [Face X-ray for More General Face Forgery Detection](https://arxiv.org/abs/1912.13458) by Lingzhi Li, Jianmin Bao, Ting Zhang, Hao Yang, Dong Chen, Fang Wen, Baining Guo.
+# Usage
+Just run bi_online_generation.py and you can get the following result. which is describe at Figure.5 in the paper.
+![demo](all_in_one.jpg)
+To get the whole BI dataset, you will need crop all the face and compute the landmarks as describe in the code.

training/dataset/library/all_in_one.jpg ADDED Viewed

training/dataset/library/bi_online_generation.py ADDED Viewed

	@@ -0,0 +1,241 @@

+import dlib
+from skimage import io
+from skimage import transform as sktransform
+import numpy as np
+from matplotlib import pyplot as plt
+import json
+import os
+import random
+from PIL import Image
+from imgaug import augmenters as iaa
+from .DeepFakeMask import dfl_full,facehull,components,extended
+import cv2
+import tqdm
+def name_resolve(path):
+    name = os.path.splitext(os.path.basename(path))[0]
+    vid_id, frame_id = name.split('_')[0:2]
+    return vid_id, frame_id
+def total_euclidean_distance(a,b):
+    assert len(a.shape) == 2
+    return np.sum(np.linalg.norm(a-b,axis=1))
+def random_get_hull(landmark,img1,hull_type):
+    if hull_type == 0:
+        mask = dfl_full(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
+        return mask/255
+    elif hull_type == 1:
+        mask = extended(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
+        return mask/255
+    elif hull_type == 2:
+        mask = components(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
+        return mask/255
+    elif hull_type == 3:
+        mask = facehull(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
+        return mask/255
+def random_erode_dilate(mask, ksize=None):
+    if random.random()>0.5:
+        if ksize is  None:
+            ksize = random.randint(1,21)
+        if ksize % 2 == 0:
+            ksize += 1
+        mask = np.array(mask).astype(np.uint8)*255
+        kernel = np.ones((ksize,ksize),np.uint8)
+        mask = cv2.erode(mask,kernel,1)/255
+    else:
+        if ksize is  None:
+            ksize = random.randint(1,5)
+        if ksize % 2 == 0:
+            ksize += 1
+        mask = np.array(mask).astype(np.uint8)*255
+        kernel = np.ones((ksize,ksize),np.uint8)
+        mask = cv2.dilate(mask,kernel,1)/255
+    return mask
+# borrow from https://github.com/MarekKowalski/FaceSwap
+def blendImages(src, dst, mask, featherAmount=0.2):
+    maskIndices = np.where(mask != 0)
+    src_mask = np.ones_like(mask)
+    dst_mask = np.zeros_like(mask)
+    maskPts = np.hstack((maskIndices[1][:, np.newaxis], maskIndices[0][:, np.newaxis]))
+    faceSize = np.max(maskPts, axis=0) - np.min(maskPts, axis=0)
+    featherAmount = featherAmount * np.max(faceSize)
+    hull = cv2.convexHull(maskPts)
+    dists = np.zeros(maskPts.shape[0])
+    for i in range(maskPts.shape[0]):
+        dists[i] = cv2.pointPolygonTest(hull, (maskPts[i, 0], maskPts[i, 1]), True)
+    weights = np.clip(dists / featherAmount, 0, 1)
+    composedImg = np.copy(dst)
+    composedImg[maskIndices[0], maskIndices[1]] = weights[:, np.newaxis] * src[maskIndices[0], maskIndices[1]] + (1 - weights[:, np.newaxis]) * dst[maskIndices[0], maskIndices[1]]
+    composedMask = np.copy(dst_mask)
+    composedMask[maskIndices[0], maskIndices[1]] = weights[:, np.newaxis] * src_mask[maskIndices[0], maskIndices[1]] + (
+                1 - weights[:, np.newaxis]) * dst_mask[maskIndices[0], maskIndices[1]]
+    return composedImg, composedMask
+# borrow from https://github.com/MarekKowalski/FaceSwap
+def colorTransfer(src, dst, mask):
+    transferredDst = np.copy(dst)
+    maskIndices = np.where(mask != 0)
+    maskedSrc = src[maskIndices[0], maskIndices[1]].astype(np.int32)
+    maskedDst = dst[maskIndices[0], maskIndices[1]].astype(np.int32)
+    meanSrc = np.mean(maskedSrc, axis=0)
+    meanDst = np.mean(maskedDst, axis=0)
+    maskedDst = maskedDst - meanDst
+    maskedDst = maskedDst + meanSrc
+    maskedDst = np.clip(maskedDst, 0, 255)
+    transferredDst[maskIndices[0], maskIndices[1]] = maskedDst
+    return transferredDst
+class BIOnlineGeneration():
+    def __init__(self):
+        with open('precomuted_landmarks.json', 'r') as f:
+            self.landmarks_record =  json.load(f)
+            for k,v in self.landmarks_record.items():
+                self.landmarks_record[k] = np.array(v)
+        # extract all frame from all video in the name of {videoid}_{frameid}
+        self.data_list = [
+                    '000_0000.png',
+                    '001_0000.png'
+                    ] * 10000
+        # predefine mask distortion
+        self.distortion = iaa.Sequential([iaa.PiecewiseAffine(scale=(0.01, 0.15))])
+    def gen_one_datapoint(self):
+        background_face_path = random.choice(self.data_list)
+        data_type = 'real' if random.randint(0,1) else 'fake'
+        if data_type == 'fake' :
+            face_img,mask =  self.get_blended_face(background_face_path)
+            mask = ( 1 - mask ) * mask * 4
+        else:
+            face_img = io.imread(background_face_path)
+            mask = np.zeros((317, 317, 1))
+        # randomly downsample after BI pipeline
+        if random.randint(0,1):
+            aug_size = random.randint(64, 317)
+            face_img = Image.fromarray(face_img)
+            if random.randint(0,1):
+                face_img = face_img.resize((aug_size, aug_size), Image.BILINEAR)
+            else:
+                face_img = face_img.resize((aug_size, aug_size), Image.NEAREST)
+            face_img = face_img.resize((317, 317),Image.BILINEAR)
+            face_img = np.array(face_img)
+        # random jpeg compression after BI pipeline
+        if random.randint(0,1):
+            quality = random.randint(60, 100)
+            encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), quality]
+            face_img_encode = cv2.imencode('.jpg', face_img, encode_param)[1]
+            face_img = cv2.imdecode(face_img_encode, cv2.IMREAD_COLOR)
+        face_img = face_img[60:317,30:287,:]
+        mask = mask[60:317,30:287,:]
+        # random flip
+        if random.randint(0,1):
+            face_img = np.flip(face_img,1)
+            mask = np.flip(mask,1)
+        return face_img,mask,data_type
+    def get_blended_face(self,background_face_path):
+        background_face = io.imread(background_face_path)
+        background_landmark = self.landmarks_record[background_face_path]
+        foreground_face_path = self.search_similar_face(background_landmark,background_face_path)
+        foreground_face = io.imread(foreground_face_path)
+        # down sample before blending
+        aug_size = random.randint(128,317)
+        background_landmark = background_landmark * (aug_size/317)
+        foreground_face = sktransform.resize(foreground_face,(aug_size,aug_size),preserve_range=True).astype(np.uint8)
+        background_face = sktransform.resize(background_face,(aug_size,aug_size),preserve_range=True).astype(np.uint8)
+        # get random type of initial blending mask
+        mask = random_get_hull(background_landmark, background_face)
+        #  random deform mask
+        mask = self.distortion.augment_image(mask)
+        mask = random_erode_dilate(mask)
+        # filte empty mask after deformation
+        if np.sum(mask) == 0 :
+            raise NotImplementedError
+        # apply color transfer
+        foreground_face = colorTransfer(background_face, foreground_face, mask*255)
+        # blend two face
+        blended_face, mask = blendImages(foreground_face, background_face, mask*255)
+        blended_face = blended_face.astype(np.uint8)
+        # resize back to default resolution
+        blended_face = sktransform.resize(blended_face,(317,317),preserve_range=True).astype(np.uint8)
+        mask = sktransform.resize(mask,(317,317),preserve_range=True)
+        mask = mask[:,:,0:1]
+        return blended_face,mask
+    def search_similar_face(self,this_landmark,background_face_path):
+        vid_id, frame_id = name_resolve(background_face_path)
+        min_dist = 99999999
+        # random sample 5000 frame from all frams:
+        all_candidate_path = random.sample( self.data_list, k=5000)
+        # filter all frame that comes from the same video as background face
+        all_candidate_path = filter(lambda k:name_resolve(k)[0] != vid_id, all_candidate_path)
+        all_candidate_path = list(all_candidate_path)
+        # loop throungh all candidates frame to get best match
+        for candidate_path in all_candidate_path:
+            candidate_landmark = self.landmarks_record[candidate_path].astype(np.float32)
+            candidate_distance = total_euclidean_distance(candidate_landmark, this_landmark)
+            if candidate_distance < min_dist:
+                min_dist = candidate_distance
+                min_path = candidate_path
+        return min_path
+if __name__ == '__main__':
+    ds = BIOnlineGeneration()
+    from tqdm import tqdm
+    all_imgs = []
+    for _ in tqdm(range(50)):
+        img,mask,label = ds.gen_one_datapoint()
+        mask = np.repeat(mask,3,2)
+        mask = (mask*255).astype(np.uint8)
+        img_cat = np.concatenate([img,mask],1)
+        all_imgs.append(img_cat)
+    all_in_one = Image.new('RGB', (2570,2570))
+    for x in range(5):
+        for y in range(10):
+            idx = x*10+y
+            im = Image.fromarray(all_imgs[idx])
+            dx = x*514
+            dy = y*257
+            all_in_one.paste(im, (dx,dy))
+    all_in_one.save("all_in_one.jpg")

training/dataset/library/precomuted_landmarks.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"000_0000.png": [[56, 143], [57, 168], [61, 192], [67, 216], [76, 238], [93, 257], [112, 273], [133, 288], [156, 291], [178, 287], [198, 271], [219, 256], [236, 237], [246, 216], [250, 192], [252, 167], [252, 142], [69, 131], [84, 123], [102, 123], [119, 126], [137, 132], [178, 130], [195, 122], [213, 119], [230, 119], [244, 126], [158, 149], [158, 168], [158, 186], [159, 205], [140, 211], [148, 214], [158, 219], [168, 214], [176, 210], [91, 150], [102, 143], [116, 144], [127, 154], [115, 156], [101, 156], [188, 152], [199, 142], [213, 141], [224, 148], [214, 153], [201, 154], [117, 232], [134, 229], [148, 228], [158, 231], [168, 228], [181, 229], [195, 232], [182, 246], [169, 253], [158, 254], [147, 254], [132, 247], [125, 234], [147, 238], [158, 239], [168, 237], [188, 234], [168, 237], [158, 239], [147, 238]], "001_0000.png": [[56, 143], [57, 168], [61, 192], [67, 216], [76, 238], [93, 257], [112, 273], [133, 288], [156, 291], [178, 287], [198, 271], [219, 256], [236, 237], [246, 216], [250, 192], [252, 167], [252, 142], [69, 131], [84, 123], [102, 123], [119, 126], [137, 132], [178, 130], [195, 122], [213, 119], [230, 119], [244, 126], [158, 149], [158, 168], [158, 186], [159, 205], [140, 211], [148, 214], [158, 219], [168, 214], [176, 210], [91, 150], [102, 143], [116, 144], [127, 154], [115, 156], [101, 156], [188, 152], [199, 142], [213, 141], [224, 148], [214, 153], [201, 154], [117, 232], [134, 229], [148, 228], [158, 231], [168, 228], [181, 229], [195, 232], [182, 246], [169, 253], [158, 254], [147, 254], [132, 247], [125, 234], [147, 238], [158, 239], [168, 237], [188, 234], [168, 237], [158, 239], [147, 238]]}

training/dataset/lrl_dataset.py ADDED Viewed

	@@ -0,0 +1,139 @@

+import os
+import sys
+current_file_path = os.path.abspath(__file__)
+parent_dir = os.path.dirname(os.path.dirname(current_file_path))
+project_root_dir = os.path.dirname(parent_dir)
+sys.path.append(parent_dir)
+sys.path.append(project_root_dir)
+import cv2
+import random
+import yaml
+import torch
+import numpy as np
+from copy import deepcopy
+import albumentations as A
+from .abstract_dataset import DeepfakeAbstractBaseDataset
+from PIL import Image
+c=0
+class LRLDataset(DeepfakeAbstractBaseDataset):
+    def __init__(self, config=None, mode='train'):
+        super().__init__(config, mode)
+        global c
+        c=config
+    def multi_pass_filter(self, img, r1=0.33, r2=0.66):
+        rows, cols = img.shape
+        k = cols / rows
+        mask = np.zeros((rows, cols), np.uint8)
+        x, y = np.ogrid[:rows, :cols]
+        mask_area = (k * x + y < r1 * cols)
+        mask[mask_area] = 1
+        low_mask = mask
+        mask = np.ones((rows, cols), np.uint8)
+        x, y = np.ogrid[:rows, :cols]
+        mask_area = (k * x + y < r2 * cols)
+        mask[mask_area] = 0
+        high_mask = mask
+        mask1 = np.zeros((rows, cols), np.uint8)
+        mask1[low_mask == 0] = 1
+        mask2 = np.zeros((rows, cols), np.uint8)
+        mask2[high_mask == 0] = 1
+        mid_mask = mask1 * mask2
+        return low_mask, mid_mask, high_mask
+    def image2dct(self,img):
+        img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
+        img_gray = np.float32(img_gray)
+        img_dct = cv2.dct(img_gray)
+        # img_dct = np.log(np.abs(img_dct)+1e-6)
+        low_mask, mid_mask, high_mask = self.multi_pass_filter(img_dct, r1=0.33, r2=0.33)
+        img_dct_filterd = high_mask * img_dct
+        img_idct = cv2.idct(img_dct_filterd)
+        return img_idct
+    def __getitem__(self, index):
+        image_trans, label, landmark_tensors, mask_trans = super().__getitem__(index, no_norm=True)
+        img_idct = self.image2dct(image_trans)
+        # normalize idct
+        img_idct = (img_idct / 255 - 0.5) / 0.5
+        # img_idct = img_idct[np.newaxis, ...]
+        # To tensor and normalize for fake and real images
+        image_trans = self.normalize(self.to_tensor(image_trans))
+        img_idct_trans = self.to_tensor(img_idct)
+        mask_trans = torch.from_numpy(mask_trans)
+        mask_trans = mask_trans.squeeze(2).permute(2, 0, 1)
+        mask_trans = torch.mean(mask_trans, dim=0, keepdim=True)
+        return image_trans, label, img_idct_trans, mask_trans
+    def __len__(self):
+        return len(self.image_list)
+    @staticmethod
+    def collate_fn(batch):
+        """
+        Collate a batch of data points.
+        Args:
+            batch (list): A list of tuples containing the image tensor and label tensor.
+        Returns:
+            A tuple containing the image tensor, the label tensor, the landmark tensor,
+            and the mask tensor.
+        """
+        global c
+        images, labels, img_idct_trans, masks = zip(*batch)
+        # Stack the image, label, landmark, and mask tensors
+        images = torch.stack(images, dim=0)
+        labels = torch.LongTensor(labels)
+        masks = torch.stack(masks, dim=0)
+        img_idct_trans = torch.stack(img_idct_trans, dim=0)
+        data_dict = {
+            'image': images,
+            'label': labels,
+            'landmark': None,
+            'idct': img_idct_trans,
+            'mask': masks,
+        }
+        return data_dict
+if __name__ == '__main__':
+    with open(r'H:\code\DeepfakeBench\training\config\detector\lrl_effnb4.yaml', 'r') as f:
+        config = yaml.safe_load(f)
+    with open(r'H:\code\DeepfakeBench\training\config\train_config.yaml', 'r') as f:
+        config2 = yaml.safe_load(f)
+    random.seed(config['manualSeed'])
+    torch.manual_seed(config['manualSeed'])
+    if config['cuda']:
+        torch.cuda.manual_seed_all(config['manualSeed'])
+    config2['data_manner'] = 'lmdb'
+    config['dataset_json_folder'] = 'preprocessing/dataset_json_v3'
+    config.update(config2)
+    train_set = LRLDataset(config=config, mode='train')
+    train_data_loader = \
+        torch.utils.data.DataLoader(
+            dataset=train_set,
+            batch_size=4,
+            shuffle=True,
+            num_workers=0,
+            collate_fn=train_set.collate_fn,
+        )
+    from tqdm import tqdm
+    for iteration, batch in enumerate(tqdm(train_data_loader)):
+        print(iteration)
+        if iteration > 10:
+            break

training/dataset/lsda_dataset.py ADDED Viewed

	@@ -0,0 +1,382 @@

+import sys
+sys.path.append('.')
+import os
+import sys
+import json
+import math
+import yaml
+import numpy as np
+import cv2
+import random
+from PIL import Image
+import torch
+from torch.autograd import Variable
+from torch.utils import data
+from torchvision import transforms as T
+import skimage.draw
+import albumentations as alb
+from albumentations import Compose, RandomBrightnessContrast, \
+    HorizontalFlip, FancyPCA, HueSaturationValue, OneOf, ToGray, \
+    ShiftScaleRotate, ImageCompression, PadIfNeeded, GaussNoise, GaussianBlur, RandomResizedCrop
+from torch.utils.data.sampler import Sampler
+from .abstract_dataset import DeepfakeAbstractBaseDataset
+private_path_prefix = '/home/zhaokangran/cvpr24/training'
+fake_dict = {
+    'real': 0,
+    'Deepfakes': 1,
+    'Face2Face': 2,
+    'FaceSwap': 3,
+    'NeuralTextures': 4,
+    # 'Deepfakes_Face2Face': 5,
+    # 'Deepfakes_FaceSwap': 6,
+    # 'Deepfakes_NeuralTextures': 7,
+    # 'Deepfakes_real': 8,
+    # 'Face2Face_FaceSwap': 9,
+    # 'Face2Face_NeuralTextures': 10,
+    # 'Face2Face_real': 11,
+    # 'FaceSwap_NeuralTextures': 12,
+    # 'FaceSwap_real': 13,
+    # 'NeuralTextures_real': 14,
+}
+class RandomDownScale(alb.core.transforms_interface.ImageOnlyTransform):
+	def apply(self,img,**params):
+		return self.randomdownscale(img)
+	def randomdownscale(self,img):
+		keep_ratio=True
+		keep_input_shape=True
+		H,W,C=img.shape
+		ratio_list=[2,4]
+		r=ratio_list[np.random.randint(len(ratio_list))]
+		img_ds=cv2.resize(img,(int(W/r),int(H/r)),interpolation=cv2.INTER_NEAREST)
+		if keep_input_shape:
+			img_ds=cv2.resize(img_ds,(W,H),interpolation=cv2.INTER_LINEAR)
+		return img_ds
+augmentation_methods = alb.Compose([
+    # alb.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1,0.1), p=0.5),
+    # HorizontalFlip(p=0.5),
+    # RandomDownScale(p=0.5),
+    # alb.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=0.5),
+    alb.ImageCompression(quality_lower=40,quality_upper=100,p=0.5),
+    GaussianBlur(blur_limit=[3, 7], p=0.5)
+], p=1.)
+augmentation_methods2 = alb.Compose([
+    alb.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1,0.1), p=0.5),
+    HorizontalFlip(p=0.5),
+    RandomDownScale(p=0.5),
+    alb.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=0.5),
+    alb.ImageCompression(quality_lower=40,quality_upper=100,p=0.5),
+],
+additional_targets={f'image1':'image', f'image2':'image', f'image3':'image', f'image4':'image'},
+p=1.)
+normalize = T.Normalize(mean=[0.5, 0.5, 0.5],
+                                     std =[0.5, 0.5, 0.5])
+transforms1 = T.Compose([
+            T.ToTensor(),
+            normalize
+        ])
+#==========================================
+def load_rgb(file_path, size=256):
+    assert os.path.exists(file_path), f"{file_path} is not exists"
+    img = cv2.imread(file_path)
+    if img is None:
+        raise ValueError('Img is None: {}'.format(file_path))
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    img = cv2.resize(img, (size, size), interpolation=cv2.INTER_CUBIC)
+    return Image.fromarray(np.array(img, dtype=np.uint8))
+def load_mask(file_path, size=256):
+    mask = cv2.imread(file_path, 0)
+    if mask is None:
+        mask = np.zeros((size, size))
+    mask = cv2.resize(mask, (size, size))/255
+    mask = np.expand_dims(mask, axis=2)
+    return np.float32(mask)
+def add_gaussian_noise(ins, mean=0, stddev=0.1):
+    noise = ins.data.new(ins.size()).normal_(mean, stddev)
+    return torch.clamp(ins + noise, -1, 1)
+# class RandomBlur(object):
+#     """ Randomly blur an image
+#     """
+#     def __init__(self, ratio,)
+# class RandomCompression(object):
+#     """ Randomly compress an image
+#     """
+class CustomSampler(Sampler):
+    def __init__(self, num_groups=2*360, n_frame_per_vid=32, videos_per_group=5, batch_size=10):
+        self.num_groups = num_groups
+        self.n_frame_per_vid = n_frame_per_vid
+        self.videos_per_group = videos_per_group
+        self.batch_size = batch_size
+        assert self.batch_size % self.videos_per_group == 0, "Batch size should be a multiple of videos_per_group."
+        self.groups_per_batch = self.batch_size // self.videos_per_group
+    def __iter__(self):
+        group_indices = list(range(self.num_groups))
+        random.shuffle(group_indices)
+        # For each batch
+        for i in range(0, len(group_indices), self.groups_per_batch):
+            selected_groups = group_indices[i:i+self.groups_per_batch]
+            # For each group
+            for group in selected_groups:
+                frame_idx = random.randint(0, self.n_frame_per_vid - 1)  # Random frame index for this group's videos
+                # Return the frame for each video in this group using the same frame_idx
+                for video_offset in range(self.videos_per_group):
+                    yield group * self.videos_per_group * self.n_frame_per_vid + video_offset * self.n_frame_per_vid + frame_idx
+    def __len__(self):
+        return self.num_groups * self.videos_per_group  # Total frames
+class LSDADataset(DeepfakeAbstractBaseDataset):
+    on_3060 = "3060" in torch.cuda.get_device_name()
+    transfer_dict = {
+        'youtube':'FF-real',
+        'Deepfakes':'FF-DF',
+        'Face2Face':'FF-F2F',
+        'FaceSwap':'FF-FS',
+        'NeuralTextures':'FF-NT'
+    }
+    if on_3060:
+        data_root = r'F:\Datasets\rgb\FaceForensics++'
+    else:
+        data_root = r'./datasets/FaceForensics++'
+    data_list = {
+        'test': r'./datasets/FaceForensics++/test.json',
+        'train': r'./datasets/FaceForensics++/train.json',
+        'eval': r'./datasets/FaceForensics++/val.json'
+    }
+    def __init__(self, config=None, mode='train', with_dataset=['Deepfakes', 'Face2Face', 'FaceSwap', 'NeuralTextures']):
+        super().__init__(config, mode)
+        self.mode = mode
+        self.res = config['resolution']
+        self.fake_dict = fake_dict
+        # transform
+        self.normalize = T.Normalize(mean=config['mean'],
+                                     std =config['std'])
+        # data aug and transform
+        self.transforms1 = T.Compose([
+            T.ToTensor(),
+            self.normalize
+        ])
+        self.img_lines = []
+        self.config=config
+        with open(self.config['dataset_json_folder']+'/FaceForensics++.json', 'r') as fd:
+            self.img_json = json.load(fd)
+        with open(self.data_list[mode], 'r') as fd:
+            data = json.load(fd)
+            img_lines = []
+            for pair in data:
+                r1, r2 = pair
+                step = 1
+                # collect a group with 1+len(fakes) videos, each video has self.frames[mode] frames。这里就是按同一个video这种顺序来存的，所以读的时候自然只要有了offset，就能对应的取了
+                #此外，这里面存的压根就不是路径，而是规范化的内容。
+                for i in range(0, config['frame_num'][mode], step):
+                    # collect real data here(r1)
+                    img_lines.append(('{}/{}'.format('youtube', r1), i, 0, mode))
+                for fake_d in with_dataset:
+                    # collect fake data here(r1_r2 * 4)
+                    for i in range(0, config['frame_num'][mode], step):
+                        img_lines.append(
+                            ('{}/{}_{}'.format(fake_d, r1, r2), i, self.fake_dict[fake_d], mode))
+                for i in range(0, config['frame_num'][mode], step):
+                    # collect real data here(r2)
+                    img_lines.append(('{}/{}'.format('youtube', r2), i, 0, mode))
+                for fake_d in with_dataset:
+                    # collect fake data here(r2_r1 * 4)
+                    for i in range(0, config['frame_num'][mode], step):
+                        img_lines.append(
+                            ('{}/{}_{}'.format(fake_d, r2, r1), i, self.fake_dict[fake_d], mode))
+        # 2*360 (groups) * 1+len(with_dataset) (videos in each group) * self.frames[mode] (frames in each video)
+        assert len(img_lines) == 2*len(data) * (1 + len(with_dataset)) * config['frame_num'][mode], "to match our custom sampler, the length should be 2*360*(1+len(with_dataset))*frames[mode]"
+        self.img_lines.extend(img_lines)
+    def get_ids_from_path(self, path):
+        parts = path.split('/')
+        try:
+            if 'youtube' in path:
+                return [int(parts[-1])]
+            else:
+                return list(map(int, parts[-1].split('_')))
+        except:
+            raise ValueError("wrong path: {}".format(path))
+    def load_image(self, name, idx):
+        instance_type, video_name = name.split('/')
+        #其实并没有完全对应，而只是保证在同一video的目标时间区间内的一帧
+        all_frames = self.img_json[self.data_root.split(os.path.sep)[-1]][self.transfer_dict[instance_type]]['train']['c23'][video_name]['frames']
+        img_path = all_frames[idx]
+        impath = img_path
+        img = self.load_rgb(impath)
+        return img
+    def __getitem__(self, index):
+        name, idx, label, mode = self.img_lines[index] #这个sampler的目的是不要取重复video的图。
+        label = int(label)  # specific fake label from 1-4
+        #取img没什么好说的。然后在这里把规范化的img_lines转为实际路径。
+        try:
+            img = self.load_image(name, idx)
+        except Exception as e:
+            # 下面处理不太合适，取的不是预期的video_id/fake_method，影响后面的lsda。
+            # random_idx = random.randint(0, len(self.img_lines)-1)
+            # print(f'Error loading image {name} at index {idx} due to the loading error. Try another one at index {random_idx}')
+            # return self.__getitem__(random_idx)
+            #边界条件判断，取同video的。
+            if idx==0:
+                new_index = index+1
+            elif idx==31:
+                new_index = index-1
+            else:
+                new_index = index + random.choice([-1,1]) # 通过随机防止死递归
+            print(f'Error loading image {name} at index {idx} due to the loading error. Try another one at index {new_index}')
+            return self.__getitem__(new_index)
+        if self.mode=='train':
+            # do augmentation
+            img = np.asarray(img) # convert PIL to numpy
+            img = augmentation_methods2(image=img)['image']
+            img = Image.fromarray(np.array(img, dtype=np.uint8)) # covnert numpy to PIL
+            # transform with PIL as input
+            img = self.transforms1(img)
+        else:
+            raise ValueError("Not implemented yet")
+        return (img, label)
+    def __len__(self):
+        return len(self.img_lines)
+    @staticmethod
+    def collate_fn(batch):
+        # Unzip the batch into images and labels
+        images, labels = zip(*batch)
+        # images, labels = zip(batch['image'], batch['label'])
+        # image_list = []
+        # for i in range(len(images)//5):
+        #     img = images[i*5:(i+1)*5]
+        #     # do augmentation
+        #     imgs_aug = augmentation_methods2(image=np.asarray(img[0]), image1=np.asarray(img[1]), image2=np.asarray(img[2]), image3=np.asarray(img[3]), image4=np.asarray(img[4]))
+        #     for k in imgs_aug:
+        #         img_aug = Image.fromarray(np.array(imgs_aug[k], dtype=np.uint8)) # covnert numpy to PIL
+        #     # transform with PIL as input
+        #         img_aug = transforms1(img_aug)
+        #         image_list.append(img_aug)
+        # Stack the images and labels
+        images = torch.stack(images, dim=0)  # Shape: (batch_size, c, h, w)
+        labels = torch.tensor(labels, dtype=torch.long)
+        bs, c, h, w = images.shape
+        # Assume videos_per_group is 5 in our case
+        videos_per_group = 5
+        num_groups = bs // videos_per_group
+        # Reshape to get the group dimension: (num_groups, videos_per_group, c, h, w)
+        images_grouped = images.view(num_groups, videos_per_group, c, h, w)
+        labels_grouped = labels.view(num_groups, videos_per_group)
+        valid_indices = []
+        for i, group in enumerate(labels_grouped):
+            if set(group.numpy().tolist()) == {0, 1, 2, 3, 4}:
+                valid_indices.append(i)
+            # elif set(group.numpy().tolist()) == {0, 1, 2, 3}:
+            #     valid_indices.append(i)
+            # elif set(group.numpy().tolist()) == {0, 1, 2, 3, 4, 5}:
+            #     valid_indices.append(i)
+        images_grouped = images_grouped[valid_indices]
+        labels_grouped = labels_grouped[valid_indices]
+        if not valid_indices:
+            raise ValueError("No valid groups found in this batch.")
+        # # Shuffle the video order within each group
+        # for i in range(num_groups):
+        #     perm = torch.randperm(videos_per_group)
+        #     images_grouped[i] = images_grouped[i, perm]
+        #     labels_grouped[i] = labels_grouped[i, perm]
+        # # Flatten back to original shape but with shuffled video order
+        # images_shuffled = images_grouped.view(num_groups, videos_per_group, c, h, w)
+        # labels_shuffled = labels_grouped.view(bs)
+        return {'image': images_grouped, 'label': labels_grouped, 'mask': None, 'landmark': None}
+if __name__ == '__main__':
+    with open('/data/home/zhiyuanyan/DeepfakeBench/training/config/detector/lsda.yaml', 'r') as f:
+        config = yaml.safe_load(f)
+    train_set = LSDADataset(config=config, mode='train')
+    custom_sampler = CustomSampler(num_groups=2*360, n_frame_per_vid=config['frame_num']['train'], batch_size=config['train_batchSize'], videos_per_group=5)
+    train_data_loader = \
+        torch.utils.data.DataLoader(
+            dataset=train_set,
+            batch_size=config['train_batchSize'],
+            num_workers=0,
+            sampler=custom_sampler,
+            collate_fn=train_set.collate_fn,
+        )
+    from tqdm import tqdm
+    for iteration, batch in enumerate(tqdm(train_data_loader)):
+        print(iteration)
+        if iteration > 10:
+            break

training/dataset/pair_dataset.py ADDED Viewed

	@@ -0,0 +1,150 @@

+'''
+# author: Zhiyuan Yan
+# email: [email protected]
+# date: 2023-03-30
+The code is designed for scenarios such as disentanglement-based methods where it is necessary to ensure an equal number of positive and negative samples.
+'''
+import torch
+import random
+import numpy as np
+from dataset.abstract_dataset import DeepfakeAbstractBaseDataset
+class pairDataset(DeepfakeAbstractBaseDataset):
+    def __init__(self, config=None, mode='train'):
+        super().__init__(config, mode)
+        # Get real and fake image lists
+        # Fix the label of real images to be 0 and fake images to be 1
+        self.fake_imglist = [(img, label, 1) for img, label in zip(self.image_list, self.label_list) if label != 0]
+        self.real_imglist = [(img, label, 0) for img, label in zip(self.image_list, self.label_list) if label == 0]
+    def __getitem__(self, index, norm=True):
+        # Get the fake and real image paths and labels
+        fake_image_path, fake_spe_label, fake_label = self.fake_imglist[index]
+        real_index = random.randint(0, len(self.real_imglist) - 1)  # Randomly select a real image
+        real_image_path, real_spe_label, real_label = self.real_imglist[real_index]
+        # Get the mask and landmark paths for fake and real images
+        fake_mask_path = fake_image_path.replace('frames', 'masks')
+        fake_landmark_path = fake_image_path.replace('frames', 'landmarks').replace('.png', '.npy')
+        real_mask_path = real_image_path.replace('frames', 'masks')
+        real_landmark_path = real_image_path.replace('frames', 'landmarks').replace('.png', '.npy')
+        # Load the fake and real images
+        fake_image = self.load_rgb(fake_image_path)
+        real_image = self.load_rgb(real_image_path)
+        fake_image = np.array(fake_image)  # Convert to numpy array for data augmentation
+        real_image = np.array(real_image)  # Convert to numpy array for data augmentation
+        # Load mask and landmark (if needed) for fake and real images
+        if self.config['with_mask']:
+            fake_mask = self.load_mask(fake_mask_path)
+            real_mask = self.load_mask(real_mask_path)
+        else:
+            fake_mask, real_mask = None, None
+        if self.config['with_landmark']:
+            fake_landmarks = self.load_landmark(fake_landmark_path)
+            real_landmarks = self.load_landmark(real_landmark_path)
+        else:
+            fake_landmarks, real_landmarks = None, None
+        # Do transforms for fake and real images
+        fake_image_trans, fake_landmarks_trans, fake_mask_trans = self.data_aug(fake_image, fake_landmarks, fake_mask)
+        real_image_trans, real_landmarks_trans, real_mask_trans = self.data_aug(real_image, real_landmarks, real_mask)
+        if not norm:
+            return {"fake": (fake_image_trans, fake_label),
+                    "real": (real_image_trans, real_label)}
+        # To tensor and normalize for fake and real images
+        fake_image_trans = self.normalize(self.to_tensor(fake_image_trans))
+        real_image_trans = self.normalize(self.to_tensor(real_image_trans))
+        # Convert landmarks and masks to tensors if they exist
+        if self.config['with_landmark']:
+            fake_landmarks_trans = torch.from_numpy(fake_landmarks_trans)
+            real_landmarks_trans = torch.from_numpy(real_landmarks_trans)
+        if self.config['with_mask']:
+            fake_mask_trans = torch.from_numpy(fake_mask_trans)
+            real_mask_trans = torch.from_numpy(real_mask_trans)
+        return {"fake": (fake_image_trans, fake_label, fake_spe_label, fake_landmarks_trans, fake_mask_trans),
+                "real": (real_image_trans, real_label, real_spe_label, real_landmarks_trans, real_mask_trans)}
+    def __len__(self):
+        return len(self.fake_imglist)
+    @staticmethod
+    def collate_fn(batch):
+        """
+        Collate a batch of data points.
+        Args:
+            batch (list): A list of tuples containing the image tensor, the label tensor,
+                        the landmark tensor, and the mask tensor.
+        Returns:
+            A tuple containing the image tensor, the label tensor, the landmark tensor,
+            and the mask tensor.
+        """
+        # Separate the image, label, landmark, and mask tensors for fake and real data
+        fake_images, fake_labels, fake_spe_labels, fake_landmarks, fake_masks = zip(*[data["fake"] for data in batch])
+        real_images, real_labels, real_spe_labels, real_landmarks, real_masks = zip(*[data["real"] for data in batch])
+        # Stack the image, label, landmark, and mask tensors for fake and real data
+        fake_images = torch.stack(fake_images, dim=0)
+        fake_labels = torch.LongTensor(fake_labels)
+        fake_spe_labels = torch.LongTensor(fake_spe_labels)
+        real_images = torch.stack(real_images, dim=0)
+        real_labels = torch.LongTensor(real_labels)
+        real_spe_labels = torch.LongTensor(real_spe_labels)
+        # Special case for landmarks and masks if they are None
+        if fake_landmarks[0] is not None:
+            fake_landmarks = torch.stack(fake_landmarks, dim=0)
+        else:
+            fake_landmarks = None
+        if real_landmarks[0] is not None:
+            real_landmarks = torch.stack(real_landmarks, dim=0)
+        else:
+            real_landmarks = None
+        if fake_masks[0] is not None:
+            fake_masks = torch.stack(fake_masks, dim=0)
+        else:
+            fake_masks = None
+        if real_masks[0] is not None:
+            real_masks = torch.stack(real_masks, dim=0)
+        else:
+            real_masks = None
+        # Combine the fake and real tensors and create a dictionary of the tensors
+        images = torch.cat([real_images, fake_images], dim=0)
+        labels = torch.cat([real_labels, fake_labels], dim=0)
+        spe_labels = torch.cat([real_spe_labels, fake_spe_labels], dim=0)
+        if fake_landmarks is not None and real_landmarks is not None:
+            landmarks = torch.cat([real_landmarks, fake_landmarks], dim=0)
+        else:
+            landmarks = None
+        if fake_masks is not None and real_masks is not None:
+            masks = torch.cat([real_masks, fake_masks], dim=0)
+        else:
+            masks = None
+        data_dict = {
+            'image': images,
+            'label': labels,
+            'label_spe': spe_labels,
+            'landmark': landmarks,
+            'mask': masks
+        }
+        return data_dict

training/dataset/sbi_api.py ADDED Viewed

	@@ -0,0 +1,371 @@

+# Created by: Kaede Shiohara
+# Yamasaki Lab at The University of Tokyo
+# [email protected]
+# Copyright (c) 2021
+# 3rd party softwares' licenses are noticed at https://github.com/mapooon/SelfBlendedImages/blob/master/LICENSE
+import torch
+from torchvision import datasets,transforms,utils
+from torch.utils.data import Dataset,IterableDataset
+from glob import glob
+import os
+import numpy as np
+from PIL import Image
+import random
+import cv2
+from torch import nn
+import sys
+import scipy as sp
+from skimage.measure import label, regionprops
+from training.dataset.library.bi_online_generation import random_get_hull
+import albumentations as alb
+import warnings
+warnings.filterwarnings('ignore')
+def alpha_blend(source,target,mask):
+	mask_blured = get_blend_mask(mask)
+	img_blended=(mask_blured * source + (1 - mask_blured) * target)
+	return img_blended,mask_blured
+def dynamic_blend(source,target,mask):
+	mask_blured = get_blend_mask(mask)
+	blend_list=[0.25,0.5,0.75,1,1,1]
+	blend_ratio = blend_list[np.random.randint(len(blend_list))]
+	mask_blured*=blend_ratio
+	img_blended=(mask_blured * source + (1 - mask_blured) * target)
+	return img_blended,mask_blured
+def get_blend_mask(mask):
+	H,W=mask.shape
+	size_h=np.random.randint(192,257)
+	size_w=np.random.randint(192,257)
+	mask=cv2.resize(mask,(size_w,size_h))
+	kernel_1=random.randrange(5,26,2)
+	kernel_1=(kernel_1,kernel_1)
+	kernel_2=random.randrange(5,26,2)
+	kernel_2=(kernel_2,kernel_2)
+	mask_blured = cv2.GaussianBlur(mask, kernel_1, 0)
+	mask_blured = mask_blured/(mask_blured.max())
+	mask_blured[mask_blured<1]=0
+	mask_blured = cv2.GaussianBlur(mask_blured, kernel_2, np.random.randint(5,46))
+	mask_blured = mask_blured/(mask_blured.max())
+	mask_blured = cv2.resize(mask_blured,(W,H))
+	return mask_blured.reshape((mask_blured.shape+(1,)))
+def get_alpha_blend_mask(mask):
+	kernel_list=[(11,11),(9,9),(7,7),(5,5),(3,3)]
+	blend_list=[0.25,0.5,0.75]
+	kernel_idxs=random.choices(range(len(kernel_list)), k=2)
+	blend_ratio = blend_list[random.sample(range(len(blend_list)), 1)[0]]
+	mask_blured = cv2.GaussianBlur(mask, kernel_list[0], 0)
+	# print(mask_blured.max())
+	mask_blured[mask_blured<mask_blured.max()]=0
+	mask_blured[mask_blured>0]=1
+	# mask_blured = mask
+	mask_blured = cv2.GaussianBlur(mask_blured, kernel_list[kernel_idxs[1]], 0)
+	mask_blured = mask_blured/(mask_blured.max())
+	return mask_blured.reshape((mask_blured.shape+(1,)))
+class RandomDownScale(alb.core.transforms_interface.ImageOnlyTransform):
+	def apply(self,img,**params):
+		return self.randomdownscale(img)
+	def randomdownscale(self,img):
+		keep_ratio=True
+		keep_input_shape=True
+		H,W,C=img.shape
+		ratio_list=[2,4]
+		r=ratio_list[np.random.randint(len(ratio_list))]
+		img_ds=cv2.resize(img,(int(W/r),int(H/r)),interpolation=cv2.INTER_NEAREST)
+		if keep_input_shape:
+			img_ds=cv2.resize(img_ds,(W,H),interpolation=cv2.INTER_LINEAR)
+		return img_ds
+def get_boundary(mask, apply_dilation=True, apply_motion_blur=True):
+    if len(mask.shape) == 3:
+        mask = mask[:, :, 0]
+    mask = cv2.GaussianBlur(mask, (3, 3), 0)
+    if mask.max() > 1:
+        boundary = mask / 255.
+    else:
+        boundary = mask
+    boundary = 4 * boundary * (1. - boundary)
+    boundary = boundary * 255
+    boundary = random_dilate(boundary)
+    if apply_motion_blur:
+        boundary = random_motion_blur(boundary)
+        boundary = boundary / 255.
+    return boundary
+def random_dilate(mask, max_kernel_size=5):
+    kernel_size = random.randint(1, max_kernel_size)
+    kernel = np.ones((kernel_size, kernel_size), np.uint8)
+    dilated_mask = cv2.dilate(mask, kernel, iterations=1)
+    return dilated_mask
+def random_motion_blur(mask, max_kernel_size=5):
+    kernel_size = random.randint(1, max_kernel_size)
+    kernel = np.zeros((kernel_size, kernel_size))
+    anchor = random.randint(0, kernel_size - 1)
+    kernel[:, anchor] = 1 / kernel_size
+    motion_blurred_mask = cv2.filter2D(mask, -1, kernel)
+    return motion_blurred_mask
+class SBI_API:
+	def __init__(self,phase='train',image_size=256):
+		assert phase == 'train', f"Current SBI API only support train phase, but got {phase}"
+		self.image_size=(image_size,image_size)
+		self.phase=phase
+		self.transforms=self.get_transforms()
+		self.source_transforms = self.get_source_transforms()
+		self.bob_transforms = self.get_source_transforms_for_bob()
+	def __call__(self,img,landmark=None):
+		try:
+			assert landmark is not None, "landmark of the facial image should not be None."
+			# img_r,img_f,mask_f=self.self_blending(img.copy(),landmark.copy())
+			if random.random() < 1.0:
+				# apply sbi
+				img_r,img_f,mask_f=self.self_blending(img.copy(),landmark.copy())
+			else:
+				# apply boundary motion blur (bob)
+				img_r,img_f,mask_f=self.bob(img.copy(),landmark.copy())
+			if self.phase=='train':
+				transformed=self.transforms(image=img_f.astype('uint8'),image1=img_r.astype('uint8'))
+				img_f=transformed['image']
+				img_r=transformed['image1']
+			return img_f,img_r
+		except Exception as e:
+			print(e)
+			return None,None
+	def get_source_transforms(self):
+		return alb.Compose([
+				alb.Compose([
+						alb.RGBShift((-20,20),(-20,20),(-20,20),p=0.3),
+						alb.HueSaturationValue(hue_shift_limit=(-0.3,0.3), sat_shift_limit=(-0.3,0.3), val_shift_limit=(-0.3,0.3), p=1),
+						alb.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1,0.1), p=1),
+					],p=1),
+				alb.OneOf([
+					RandomDownScale(p=1),
+					alb.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=1),
+				],p=1),
+			], p=1.)
+	def get_transforms(self):
+		return alb.Compose([
+			alb.RGBShift((-20,20),(-20,20),(-20,20),p=0.3),
+			alb.HueSaturationValue(hue_shift_limit=(-0.3,0.3), sat_shift_limit=(-0.3,0.3), val_shift_limit=(-0.3,0.3), p=0.3),
+			alb.RandomBrightnessContrast(brightness_limit=(-0.3,0.3), contrast_limit=(-0.3,0.3), p=0.3),
+			alb.ImageCompression(quality_lower=40,quality_upper=100,p=0.5),
+		],
+		additional_targets={f'image1': 'image'},
+		p=1.)
+	def randaffine(self,img,mask):
+		f=alb.Affine(
+				translate_percent={'x':(-0.03,0.03),'y':(-0.015,0.015)},
+				scale=[0.95,1/0.95],
+				fit_output=False,
+				p=1)
+		g=alb.ElasticTransform(
+				alpha=50,
+				sigma=7,
+				alpha_affine=0,
+				p=1,
+			)
+		transformed=f(image=img,mask=mask)
+		img=transformed['image']
+		mask=transformed['mask']
+		transformed=g(image=img,mask=mask)
+		mask=transformed['mask']
+		return img,mask
+	def get_source_transforms_for_bob(self):
+		return alb.Compose([
+				alb.Compose([
+						alb.ImageCompression(quality_lower=40,quality_upper=100,p=1),
+					],p=1),
+				alb.OneOf([
+					RandomDownScale(p=1),
+					alb.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=1),
+				],p=1),
+			], p=1.)
+	def bob(self,img,landmark):
+		H,W=len(img),len(img[0])
+		if np.random.rand()<0.25:
+			landmark=landmark[:68]
+		# mask=np.zeros_like(img[:,:,0])
+		# cv2.fillConvexPoly(mask, cv2.convexHull(landmark), 1.)
+		hull_type = random.choice([0, 1, 2, 3])
+		mask=random_get_hull(landmark,img,hull_type)[:,:,0]
+		source = img.copy()
+		source = self.bob_transforms(image=source.astype(np.uint8))['image']
+		source, mask = self.randaffine(source,mask)
+		mask = get_blend_mask(mask)
+		# get boundary with motion blur
+		boundary = get_boundary(mask)
+		blend_list = [0.25,0.5,0.75,1,1,1]
+		blend_ratio = blend_list[np.random.randint(len(blend_list))]
+		boundary *= blend_ratio
+		boundary = np.repeat(boundary[:, :, np.newaxis], 3, axis=2)
+		img_blended = (boundary * source + (1 - boundary) * img)
+		img_blended = img_blended.astype(np.uint8)
+		img = img.astype(np.uint8)
+		return img,img_blended,boundary.squeeze()
+	def self_blending(self,img,landmark):
+		H,W=len(img),len(img[0])
+		if np.random.rand()<0.25:
+			landmark=landmark[:68]
+		# mask=np.zeros_like(img[:,:,0])
+		# cv2.fillConvexPoly(mask, cv2.convexHull(landmark), 1.)
+		hull_type = random.choice([0, 1, 2, 3])
+		mask=random_get_hull(landmark,img,hull_type)[:,:,0]
+		source = img.copy()
+		if np.random.rand()<0.5:
+			source = self.source_transforms(image=source.astype(np.uint8))['image']
+		else:
+			img = self.source_transforms(image=img.astype(np.uint8))['image']
+		source, mask = self.randaffine(source,mask)
+		img_blended,mask=dynamic_blend(source,img,mask)
+		img_blended = img_blended.astype(np.uint8)
+		img = img.astype(np.uint8)
+		return img,img_blended,mask
+	def reorder_landmark(self,landmark):
+		landmark_add=np.zeros((13,2))
+		for idx,idx_l in enumerate([77,75,76,68,69,70,71,80,72,73,79,74,78]):
+			landmark_add[idx]=landmark[idx_l]
+		landmark[68:]=landmark_add
+		return landmark
+	def hflip(self,img,mask=None,landmark=None,bbox=None):
+		H,W=img.shape[:2]
+		landmark=landmark.copy()
+		if bbox is not None:
+			bbox=bbox.copy()
+		if landmark is not None:
+			landmark_new=np.zeros_like(landmark)
+			landmark_new[:17]=landmark[:17][::-1]
+			landmark_new[17:27]=landmark[17:27][::-1]
+			landmark_new[27:31]=landmark[27:31]
+			landmark_new[31:36]=landmark[31:36][::-1]
+			landmark_new[36:40]=landmark[42:46][::-1]
+			landmark_new[40:42]=landmark[46:48][::-1]
+			landmark_new[42:46]=landmark[36:40][::-1]
+			landmark_new[46:48]=landmark[40:42][::-1]
+			landmark_new[48:55]=landmark[48:55][::-1]
+			landmark_new[55:60]=landmark[55:60][::-1]
+			landmark_new[60:65]=landmark[60:65][::-1]
+			landmark_new[65:68]=landmark[65:68][::-1]
+			if len(landmark)==68:
+				pass
+			elif len(landmark)==81:
+				landmark_new[68:81]=landmark[68:81][::-1]
+			else:
+				raise NotImplementedError
+			landmark_new[:,0]=W-landmark_new[:,0]
+		else:
+			landmark_new=None
+		if bbox is not None:
+			bbox_new=np.zeros_like(bbox)
+			bbox_new[0,0]=bbox[1,0]
+			bbox_new[1,0]=bbox[0,0]
+			bbox_new[:,0]=W-bbox_new[:,0]
+			bbox_new[:,1]=bbox[:,1].copy()
+			if len(bbox)>2:
+				bbox_new[2,0]=W-bbox[3,0]
+				bbox_new[2,1]=bbox[3,1]
+				bbox_new[3,0]=W-bbox[2,0]
+				bbox_new[3,1]=bbox[2,1]
+				bbox_new[4,0]=W-bbox[4,0]
+				bbox_new[4,1]=bbox[4,1]
+				bbox_new[5,0]=W-bbox[6,0]
+				bbox_new[5,1]=bbox[6,1]
+				bbox_new[6,0]=W-bbox[5,0]
+				bbox_new[6,1]=bbox[5,1]
+		else:
+			bbox_new=None
+		if mask is not None:
+			mask=mask[:,::-1]
+		else:
+			mask=None
+		img=img[:,::-1].copy()
+		return img,mask,landmark_new,bbox_new
+if __name__=='__main__':
+	seed=10
+	random.seed(seed)
+	torch.manual_seed(seed)
+	np.random.seed(seed)
+	torch.cuda.manual_seed(seed)
+	torch.backends.cudnn.deterministic = True
+	torch.backends.cudnn.benchmark = False
+	api=SBI_API(phase='train',image_size=256)
+	img_path = 'FaceForensics++/original_sequences/youtube/c23/frames/000/000.png'
+	img = cv2.imread(img_path)
+	landmark_path = img_path.replace('frames', 'landmarks').replace('png', 'npy')
+	landmark = np.load(landmark_path)
+	sbi_img, ori_img = api(img, landmark)

training/dataset/sbi_dataset.py ADDED Viewed

	@@ -0,0 +1,139 @@

+'''
+# author: Zhiyuan Yan
+# email: [email protected]
+# date: 2024-01-26
+The code is designed for self-blending method (SBI, CVPR 2024).
+'''
+import sys
+sys.path.append('.')
+import cv2
+import yaml
+import torch
+import numpy as np
+from copy import deepcopy
+import albumentations as A
+from training.dataset.albu import IsotropicResize
+from training.dataset.abstract_dataset import DeepfakeAbstractBaseDataset
+from training.dataset.sbi_api import SBI_API
+class SBIDataset(DeepfakeAbstractBaseDataset):
+    def __init__(self, config=None, mode='train'):
+        super().__init__(config, mode)
+        # Get real lists
+        # Fix the label of real images to be 0
+        self.real_imglist = [(img, label) for img, label in zip(self.image_list, self.label_list) if label == 0]
+        # Init SBI
+        self.sbi = SBI_API(phase=mode,image_size=config['resolution'])
+        # Init data augmentation method
+        self.transform = self.init_data_aug_method()
+    def __getitem__(self, index):
+        # Get the real image paths and labels
+        real_image_path, real_label = self.real_imglist[index]
+        # Get the landmark paths for real images
+        real_landmark_path = real_image_path.replace('frames', 'landmarks').replace('.png', '.npy')
+        landmark = self.load_landmark(real_landmark_path).astype(np.int32)
+        # Load the real images
+        real_image = self.load_rgb(real_image_path)
+        real_image = np.array(real_image)  # Convert to numpy array
+        # Generate the corresponding SBI sample
+        fake_image, real_image = self.sbi(real_image, landmark)
+        if fake_image is None:
+            fake_image = deepcopy(real_image)
+            fake_label = 0
+        else:
+            fake_label = 1
+        # To tensor and normalize for fake and real images
+        fake_image_trans = self.normalize(self.to_tensor(fake_image))
+        real_image_trans = self.normalize(self.to_tensor(real_image))
+        return {"fake": (fake_image_trans, fake_label),
+                "real": (real_image_trans, real_label)}
+    def __len__(self):
+        return len(self.real_imglist)
+    @staticmethod
+    def collate_fn(batch):
+        """
+        Collate a batch of data points.
+        Args:
+            batch (list): A list of tuples containing the image tensor and label tensor.
+        Returns:
+            A tuple containing the image tensor, the label tensor, the landmark tensor,
+            and the mask tensor.
+        """
+        # Separate the image, label, landmark, and mask tensors for fake and real data
+        fake_images, fake_labels = zip(*[data["fake"] for data in batch])
+        real_images, real_labels = zip(*[data["real"] for data in batch])
+        # Stack the image, label, landmark, and mask tensors for fake and real data
+        fake_images = torch.stack(fake_images, dim=0)
+        fake_labels = torch.LongTensor(fake_labels)
+        real_images = torch.stack(real_images, dim=0)
+        real_labels = torch.LongTensor(real_labels)
+        # Combine the fake and real tensors and create a dictionary of the tensors
+        images = torch.cat([real_images, fake_images], dim=0)
+        labels = torch.cat([real_labels, fake_labels], dim=0)
+        data_dict = {
+            'image': images,
+            'label': labels,
+            'landmark': None,
+            'mask': None,
+        }
+        return data_dict
+    def init_data_aug_method(self):
+        trans = A.Compose([
+            A.HorizontalFlip(p=self.config['data_aug']['flip_prob']),
+            A.Rotate(limit=self.config['data_aug']['rotate_limit'], p=self.config['data_aug']['rotate_prob']),
+            A.GaussianBlur(blur_limit=self.config['data_aug']['blur_limit'], p=self.config['data_aug']['blur_prob']),
+            A.OneOf([
+                IsotropicResize(max_side=self.config['resolution'], interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC),
+                IsotropicResize(max_side=self.config['resolution'], interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_LINEAR),
+                IsotropicResize(max_side=self.config['resolution'], interpolation_down=cv2.INTER_LINEAR, interpolation_up=cv2.INTER_LINEAR),
+            ], p = 0 if self.config['with_landmark'] else 1),
+            A.OneOf([
+                A.RandomBrightnessContrast(brightness_limit=self.config['data_aug']['brightness_limit'], contrast_limit=self.config['data_aug']['contrast_limit']),
+                A.FancyPCA(),
+                A.HueSaturationValue()
+            ], p=0.5),
+            A.ImageCompression(quality_lower=self.config['data_aug']['quality_lower'], quality_upper=self.config['data_aug']['quality_upper'], p=0.5)
+        ],
+            additional_targets={'real': 'sbi'},
+        )
+        return trans
+if __name__ == '__main__':
+    with open('/data/home/zhiyuanyan/DeepfakeBench/training/config/detector/sbi.yaml', 'r') as f:
+        config = yaml.safe_load(f)
+    train_set = SBIDataset(config=config, mode='train')
+    train_data_loader = \
+        torch.utils.data.DataLoader(
+            dataset=train_set,
+            batch_size=config['train_batchSize'],
+            shuffle=True,
+            num_workers=0,
+            collate_fn=train_set.collate_fn,
+        )
+    from tqdm import tqdm
+    for iteration, batch in enumerate(tqdm(train_data_loader)):
+        print(iteration)
+        if iteration > 10:
+            break

training/dataset/tall_dataset.py ADDED Viewed

	@@ -0,0 +1,183 @@

+# author: Zhiyuan Yan
+# email: [email protected]
+# date: 2023-03-30
+# description: Abstract Base Class for all types of deepfake datasets.
+import sys
+from torch import nn
+sys.path.append('.')
+import yaml
+import numpy as np
+from copy import deepcopy
+import random
+import torch
+from torch.utils import data
+from torchvision.utils import save_image
+from training.dataset import DeepfakeAbstractBaseDataset
+from einops import rearrange
+FFpp_pool = ['FaceForensics++', 'FaceShifter', 'DeepFakeDetection', 'FF-DF', 'FF-F2F', 'FF-FS', 'FF-NT']  #
+def all_in_pool(inputs, pool):
+    for each in inputs:
+        if each not in pool:
+            return False
+    return True
+class TALLDataset(DeepfakeAbstractBaseDataset):
+    def __init__(self, config=None, mode='train'):
+        """Initializes the dataset object.
+        Args:
+            config (dict): A dictionary containing configuration parameters.
+            mode (str): A string indicating the mode (train or test).
+        Raises:
+            NotImplementedError: If mode is not train or test.
+        """
+        super().__init__(config, mode)
+        assert self.video_level, "TALL is a videl-based method"
+        assert int(self.clip_size ** 0.5) ** 2 == self.clip_size, 'clip_size must be square of an integer, e.g., 4'
+    def __getitem__(self, index, no_norm=False):
+        """
+        Returns the data point at the given index.
+        Args:
+            index (int): The index of the data point.
+        Returns:
+            A tuple containing the image tensor, the label tensor, the landmark tensor,
+            and the mask tensor.
+        """
+        # Get the image paths and label
+        image_paths = self.data_dict['image'][index]
+        label = self.data_dict['label'][index]
+        if not isinstance(image_paths, list):
+            image_paths = [image_paths]  # for the image-level IO, only one frame is used
+        image_tensors = []
+        landmark_tensors = []
+        mask_tensors = []
+        augmentation_seed = None
+        for image_path in image_paths:
+            # Initialize a new seed for data augmentation at the start of each video
+            if self.video_level and image_path == image_paths[0]:
+                augmentation_seed = random.randint(0, 2 ** 32 - 1)
+            # Get the mask and landmark paths
+            mask_path = image_path.replace('frames', 'masks')  # Use .png for mask
+            landmark_path = image_path.replace('frames', 'landmarks').replace('.png', '.npy')  # Use .npy for landmark
+            # Load the image
+            try:
+                image = self.load_rgb(image_path)
+            except Exception as e:
+                # Skip this image and return the first one
+                print(f"Error loading image at index {index}: {e}")
+                return self.__getitem__(0)
+            image = np.array(image)  # Convert to numpy array for data augmentation
+            # Load mask and landmark (if needed)
+            if self.config['with_mask']:
+                mask = self.load_mask(mask_path)
+            else:
+                mask = None
+            if self.config['with_landmark']:
+                landmarks = self.load_landmark(landmark_path)
+            else:
+                landmarks = None
+            # Do Data Augmentation
+            if self.mode == 'train' and self.config['use_data_augmentation']:
+                image_trans, landmarks_trans, mask_trans = self.data_aug(image, landmarks, mask, augmentation_seed)
+            else:
+                image_trans, landmarks_trans, mask_trans = deepcopy(image), deepcopy(landmarks), deepcopy(mask)
+            # To tensor and normalize
+            if not no_norm:
+                image_trans = self.normalize(self.to_tensor(image_trans))
+                if self.config['with_landmark']:
+                    landmarks_trans = torch.from_numpy(landmarks)
+                if self.config['with_mask']:
+                    mask_trans = torch.from_numpy(mask_trans)
+            image_tensors.append(image_trans)
+            landmark_tensors.append(landmarks_trans)
+            mask_tensors.append(mask_trans)
+        if self.video_level:
+            # Stack image tensors along a new dimension (time)
+            image_tensors = torch.stack(image_tensors, dim=0)
+            # cut out 16x16 patch
+            F, C, H, W = image_tensors.shape
+            x, y = np.random.randint(W), np.random.randint(H)
+            x1 = np.clip(x - self.config['mask_grid_size'] // 2, 0, W)
+            x2 = np.clip(x + self.config['mask_grid_size'] // 2, 0, W)
+            y1 = np.clip(y - self.config['mask_grid_size'] // 2, 0, H)
+            y2 = np.clip(y + self.config['mask_grid_size'] // 2, 0, H)
+            image_tensors[:, :, y1:y2, x1:x2] = -1
+            # # concatenate sub-image and reszie to 224x224
+            # image_tensors = image_tensors.reshape(-1, H, W)
+            # image_tensors = rearrange(image_tensors, '(rh rw c) h w -> c (rh h) (rw w)', rh=2, c=C)
+            # image_tensors = nn.functional.interpolate(image_tensors.unsqueeze(0),
+            #                                           size=(self.config['resolution'], self.config['resolution']),
+            #                                           mode='bilinear', align_corners=False).squeeze(0)
+            # Stack landmark and mask tensors along a new dimension (time)
+            if not any(landmark is None or (isinstance(landmark, list) and None in landmark) for landmark in
+                       landmark_tensors):
+                landmark_tensors = torch.stack(landmark_tensors, dim=0)
+            if not any(m is None or (isinstance(m, list) and None in m) for m in mask_tensors):
+                mask_tensors = torch.stack(mask_tensors, dim=0)
+        else:
+            # Get the first image tensor
+            image_tensors = image_tensors[0]
+            # Get the first landmark and mask tensors
+            if not any(landmark is None or (isinstance(landmark, list) and None in landmark) for landmark in
+                       landmark_tensors):
+                landmark_tensors = landmark_tensors[0]
+            if not any(m is None or (isinstance(m, list) and None in m) for m in mask_tensors):
+                mask_tensors = mask_tensors[0]
+        return image_tensors, label, landmark_tensors, mask_tensors
+if __name__ == "__main__":
+    with open('training/config/detector/tall.yaml', 'r') as f:
+        config = yaml.safe_load(f)
+    train_set = TALLDataset(
+        config=config,
+        mode='train',
+    )
+    train_data_loader = \
+        torch.utils.data.DataLoader(
+            dataset=train_set,
+            batch_size=config['train_batchSize'],
+            shuffle=True,
+            num_workers=0,
+            collate_fn=train_set.collate_fn,
+        )
+    from tqdm import tqdm
+    for iteration, batch in enumerate(tqdm(train_data_loader)):
+        print(batch['image'].shape)
+        print(batch['label'])
+        b, f, c, h, w = batch['image'].shape
+        for i in range(f):
+            img_tensor = batch['image'][0][i]
+            img_tensor = img_tensor * torch.tensor([0.5, 0.5, 0.5]).reshape(-1, 1, 1) + torch.tensor(
+                [0.5, 0.5, 0.5]).reshape(-1, 1, 1)
+            save_image(img_tensor, f'{i}.png')
+        break

training/dataset/utils/DeepFakeMask.py ADDED Viewed

	@@ -0,0 +1,402 @@

+#!/usr/bin/python
+# -*- coding: UTF-8 -*-
+# Created by: algohunt
+# Microsoft Research & Peking University
+# [email protected]
+# Copyright (c) 2019
+#!/usr/bin/env python3
+""" Masks functions for faceswap.py """
+import inspect
+import logging
+import sys
+import cv2
+import numpy as np
+import random
+from math import ceil, floor
+logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
+def landmarks_to_bbox(landmarks: np.ndarray) -> np.ndarray:
+    if not isinstance(landmarks, np.ndarray):
+        landmarks = np.array(landmarks)
+    assert landmarks.shape[1] == 2
+    x0, y0 = np.min(landmarks, axis=0) # x和y轴上分别的最小值, [264,97]
+    x1, y1 = np.max(landmarks, axis=0) # x和y轴上分别的最小值, [370,236]
+    bbox = np.array([x0, y0, x1, y1])
+    return bbox
+def mask_from_points(image: np.ndarray, points: np.ndarray) -> np.ndarray:
+    """8 (or omitted) - 8-connected line.
+          4 - 4-connected line.
+    LINE_AA - antialiased line."""
+    h, w = image.shape[:2]
+    points = points.astype(int)
+    assert points.shape[1] == 2, f"points.shape: {points.shape}"
+    out = np.zeros((h, w), dtype=np.uint8)
+    hull = cv2.convexHull(points.astype(int))
+    cv2.fillConvexPoly(out, hull, 255, lineType=4)  # cv2.LINE_AA
+    return out
+def get_available_masks():
+    """ Return a list of the available masks for cli """
+    masks = sorted([name for name, obj in inspect.getmembers(sys.modules[__name__])
+                    if inspect.isclass(obj) and name != "Mask"])
+    masks.append("none")
+    # logger.debug(masks)
+    return masks
+def landmarks_68_symmetries():
+    # 68 landmarks symmetry
+    #
+    sym_ids = [9, 58, 67, 63, 52, 34, 31, 30, 29, 28]
+    sym = {
+        1: 17,
+        2: 16,
+        3: 15,
+        4: 14,
+        5: 13,
+        6: 12,
+        7: 11,
+        8: 10,
+        #
+        51: 53,
+        50: 54,
+        49: 55,
+        60: 56,
+        59: 57,
+        #
+        62: 64,
+        61: 65,
+        68: 66,
+        #
+        33: 35,
+        32: 36,
+        #
+        37: 46,
+        38: 45,
+        39: 44,
+        40: 43,
+        41: 48,
+        42: 47,
+        #
+        18: 27,
+        19: 26,
+        20: 25,
+        21: 24,
+        22: 23,
+        #
+        # id
+        9: 9,
+        58: 58,
+        67: 67,
+        63: 63,
+        52: 52,
+        34: 34,
+        31: 31,
+        30: 30,
+        29: 29,
+        28: 28,
+    }
+    return sym, sym_ids
+def get_default_mask():
+    """ Set the default mask for cli """
+    masks = get_available_masks()
+    default = "dfl_full"
+    default = default if default in masks else masks[0]
+    # logger.debug(default)
+    return default
+class Mask():
+    """ Parent class for masks
+        the output mask will be <mask_type>.mask
+        channels: 1, 3 or 4:
+                    1 - Returns a single channel mask
+                    3 - Returns a 3 channel mask
+                    4 - Returns the original image with the mask in the alpha channel """
+    def __init__(self, landmarks, face, channels=4, idx = 0):
+        # logger.info("Initializing %s: (face_shape: %s, channels: %s, landmarks: %s)",
+        #              self.__class__.__name__, face.shape, channels, landmarks)
+        self.landmarks = landmarks
+        self.face = face
+        self.channels = channels
+        self.cols = 4 # grid mask
+        self.rows = 4 # grid mask
+        self.idx = idx # grid mask
+        mask = self.build_mask()
+        self.mask = self.merge_mask(mask)
+        # logger.info("Initialized %s", self.__class__.__name__)
+    def build_mask(self):
+        """ Override to build the mask """
+        raise NotImplementedError
+    def merge_mask(self, mask):
+        """ Return the mask in requested shape """
+        # logger.info("mask_shape: %s", mask.shape)
+        assert self.channels in (1, 3, 4), "Channels should be 1, 3 or 4"
+        assert mask.shape[2] == 1 and mask.ndim == 3, "Input mask be 3 dimensions with 1 channel"
+        if self.channels == 3:
+            retval = np.tile(mask, 3)
+        elif self.channels == 4:
+            retval = np.concatenate((self.face, mask), -1)
+        else:
+            retval = mask
+        # logger.info("Final mask shape: %s", retval.shape)
+        return retval
+class dfl_full(Mask):  # pylint: disable=invalid-name
+    """ DFL facial mask """
+    def build_mask(self):
+        mask = np.zeros(self.face.shape[0:2] + (1, ), dtype=np.float32)
+        nose_ridge = (self.landmarks[27:31], self.landmarks[33:34])
+        jaw = (self.landmarks[0:17],
+               self.landmarks[48:68],
+               self.landmarks[0:1],
+               self.landmarks[8:9],
+               self.landmarks[16:17])
+        eyes = (self.landmarks[17:27],
+                self.landmarks[0:1],
+                self.landmarks[27:28],
+                self.landmarks[16:17],
+                self.landmarks[33:34])
+        parts = [jaw, nose_ridge, eyes]
+        for item in parts:
+            merged = np.concatenate(item)
+            cv2.fillConvexPoly(mask, cv2.convexHull(merged), 255.)  # pylint: disable=no-member
+        return mask
+class components(Mask):  # pylint: disable=invalid-name
+    """ Component model mask """
+    def build_mask(self):
+        mask = np.zeros(self.face.shape[0:2] + (1, ), dtype=np.float32)
+        r_jaw = (self.landmarks[0:9], self.landmarks[17:18])
+        l_jaw = (self.landmarks[8:17], self.landmarks[26:27])
+        r_cheek = (self.landmarks[17:20], self.landmarks[8:9])
+        l_cheek = (self.landmarks[24:27], self.landmarks[8:9])
+        nose_ridge = (self.landmarks[19:25], self.landmarks[8:9],)
+        r_eye = (self.landmarks[17:22],
+                 self.landmarks[27:28],
+                 self.landmarks[31:36],
+                 self.landmarks[8:9])
+        l_eye = (self.landmarks[22:27],
+                 self.landmarks[27:28],
+                 self.landmarks[31:36],
+                 self.landmarks[8:9])
+        nose = (self.landmarks[27:31], self.landmarks[31:36])
+        parts = [r_jaw, l_jaw, r_cheek, l_cheek, nose_ridge, r_eye, l_eye, nose]
+        # ---change 0531 random select parts ---
+        # r_face = (self.landmarks[0:9], self.landmarks[17:18],self.landmarks[17:20], self.landmarks[8:9])
+        # l_face = (self.landmarks[8:17], self.landmarks[26:27],self.landmarks[24:27], self.landmarks[8:9])
+        # nose_final = (self.landmarks[19:25], self.landmarks[8:9],self.landmarks[27:31], self.landmarks[31:36])
+        # parts = [r_face,l_face,nose_final,r_eye,l_eye]
+        # num_to_select = random.randint(1, len(parts))
+        # parts = random.sample(parts, num_to_select)
+        # print(len(parts), parts[0])
+        # ---change 0531 random select parts ---
+        for item in parts:
+            merged = np.concatenate(item)
+            cv2.fillConvexPoly(mask, cv2.convexHull(merged), 255.)  # pylint: disable=no-member
+        return mask
+class extended(Mask):  # pylint: disable=invalid-name
+    """ Extended mask
+        Based on components mask. Attempts to extend the eyebrow points up the forehead
+    """
+    def build_mask(self):
+        mask = np.zeros(self.face.shape[0:2] + (1, ), dtype=np.float32)
+        landmarks = self.landmarks.copy()
+        # mid points between the side of face and eye point
+        ml_pnt = (landmarks[36] + landmarks[0]) // 2
+        mr_pnt = (landmarks[16] + landmarks[45]) // 2
+        # mid points between the mid points and eye
+        ql_pnt = (landmarks[36] + ml_pnt) // 2
+        qr_pnt = (landmarks[45] + mr_pnt) // 2
+        # Top of the eye arrays
+        bot_l = np.array((ql_pnt, landmarks[36], landmarks[37], landmarks[38], landmarks[39]))
+        bot_r = np.array((landmarks[42], landmarks[43], landmarks[44], landmarks[45], qr_pnt))
+        # Eyebrow arrays
+        top_l = landmarks[17:22]
+        top_r = landmarks[22:27]
+        # Adjust eyebrow arrays
+        landmarks[17:22] = top_l + ((top_l - bot_l) // 2)
+        landmarks[22:27] = top_r + ((top_r - bot_r) // 2)
+        r_jaw = (landmarks[0:9], landmarks[17:18])
+        l_jaw = (landmarks[8:17], landmarks[26:27])
+        r_cheek = (landmarks[17:20], landmarks[8:9])
+        l_cheek = (landmarks[24:27], landmarks[8:9])
+        nose_ridge = (landmarks[19:25], landmarks[8:9],)
+        r_eye = (landmarks[17:22], landmarks[27:28], landmarks[31:36], landmarks[8:9])
+        l_eye = (landmarks[22:27], landmarks[27:28], landmarks[31:36], landmarks[8:9])
+        nose = (landmarks[27:31], landmarks[31:36])
+        parts = [r_jaw, l_jaw, r_cheek, l_cheek, nose_ridge, r_eye, l_eye, nose]
+        for item in parts:
+            merged = np.concatenate(item)
+            cv2.fillConvexPoly(mask, cv2.convexHull(merged), 255.)  # pylint: disable=no-member
+        return mask
+class facehull(Mask):  # pylint: disable=invalid-name
+    """ Basic face hull mask """
+    def build_mask(self):
+        mask = np.zeros(self.face.shape[0:2] + (1, ), dtype=np.float32)
+        hull = cv2.convexHull(  # pylint: disable=no-member
+            np.array(self.landmarks).reshape((-1, 2)))
+        cv2.fillConvexPoly(mask, hull, 255.0, lineType=cv2.LINE_AA)  # pylint: disable=no-member
+        return mask
+        # mask = np.zeros(img.shape[0:2] + (1, ), dtype=np.float32)
+        # hull = cv2.convexHull(np.array(landmark).reshape((-1, 2)))
+class facehull2(Mask):  # pylint: disable=invalid-name
+    """ Basic face hull mask """
+    def build_mask(self):
+        mask = np.zeros(self.face.shape[0:2] + (1, ), dtype=np.uint8)
+        hull = cv2.convexHull(  # pylint: disable=no-member
+            np.array(self.landmarks).reshape((-1, 2)))
+        cv2.fillConvexPoly(mask, hull, 1.0, lineType=cv2.LINE_AA)
+        return mask
+class gridMasking(Mask):
+    def build_mask(self):
+        h, w = self.face.shape[:2]
+        landmarks = self.landmarks[:68]
+        # if idx is None:
+        #    idx = np.random.randint(0, self.total)
+        r, c = divmod(self.idx, self.cols) # 获得除数和余数，即这个idx对应第r行第c列
+        # pixel related
+        xmin, ymin, xmax, ymax = landmarks_to_bbox(landmarks)
+        dx = ceil((xmax - xmin) / self.cols)
+        dy = ceil((ymax - ymin) / self.rows)
+        mask = np.zeros((h, w), dtype=np.uint8)
+        # fill the cell mask
+        x0, y0 = floor(xmin + dx * c), floor(ymin + dy * r)
+        x1, y1 = floor(x0 + dx), floor(y0 + dy)
+        cv2.rectangle(mask, (x0, y0), (x1, y1), 255, -1)
+        # merge the cell mask with the convex hull
+        ch = mask_from_points(self.face, landmarks)
+        # ch = cv2.cvtColor(ch, cv2.COLOR_BGR2GRAY)
+        # mask = (mask & ch) / 255.0
+        mask = cv2.bitwise_and(mask, mask, mask=ch)
+        mask = mask.reshape([mask.shape[0],mask.shape[1], 1])
+        # cv2.bitwise_or(img, d_3c_i)
+        return mask
+class MeshgridMasking(Mask):
+    areas = [
+        [1, 2, 3, 4, 5, 6, 7, 49, 32, 40, 41, 42, 37, 18],
+        [37, 38, 39, 40, 41, 42],  # left eye
+        [18, 19, 20, 21, 22, 28, 40, 39, 38, 37],
+        [28, 29, 30, 31, 32, 40],
+    ]
+    areas_asym = [
+        [20, 21, 22, 28, 23, 24, 25],  # old [22, 23, 28],
+        [31, 32, 33, 34, 35, 36],
+        [32, 33, 34, 35, 36, 55, 54, 53, 52, 51, 50, 49],
+        [49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60],
+        [7, 8, 9, 10, 11, 55, 56, 57, 58, 59, 60, 49],
+    ]
+    def init(self, **kwargs):
+        # super().__init__(**kwargs)
+        sym, _ = landmarks_68_symmetries()
+        # construct list of points paths
+        paths = []
+        paths += self.areas_asym  # asymmetrical areas
+        paths += self.areas  # left
+        paths += [[sym[ld68_id] for ld68_id in area] for area in self.areas]  # right
+        assert len(paths) == self.total
+        self.paths = paths
+    @property
+    def total(self) -> int:
+        total = len(self.areas_asym) + len(self.areas) * 2
+        return total
+    def transform_landmarks(self, landmarks):
+        """Transform landmarks to extend the eyebrow points up the forehead"""
+        new_landmarks = landmarks.copy()
+        # mid points between the side of face and eye point
+        ml_pnt = (new_landmarks[36] + new_landmarks[0]) // 2
+        mr_pnt = (new_landmarks[16] + new_landmarks[45]) // 2
+        # mid points between the mid points and eye
+        ql_pnt = (new_landmarks[36] + ml_pnt) // 2
+        qr_pnt = (new_landmarks[45] + mr_pnt) // 2
+        # Top of the eye arrays
+        bot_l = np.array(
+            (
+                ql_pnt,
+                new_landmarks[36],
+                new_landmarks[37],
+                new_landmarks[38],
+                new_landmarks[39],
+            )
+        )
+        bot_r = np.array(
+            (
+                new_landmarks[42],
+                new_landmarks[43],
+                new_landmarks[44],
+                new_landmarks[45],
+                qr_pnt,
+            )
+        )
+        # Eyebrow arrays
+        top_l = new_landmarks[17:22]
+        top_r = new_landmarks[22:27]
+        # Adjust eyebrow arrays
+        new_landmarks[17:22] = top_l + ((top_l - bot_l) // 2)
+        new_landmarks[22:27] = top_r + ((top_r - bot_r) // 2)
+        return new_landmarks
+    def build_mask(self) -> np.ndarray:
+        self.init()
+        h, w = self.face.shape[:2]
+        path = self.paths[self.idx]
+        new_landmarks = self.transform_landmarks(self.landmarks)
+        points = [new_landmarks[ld68_id - 1] for ld68_id in path]
+        points = np.array(points, dtype=np.int32)
+        # cv2.fillConvexPoly(out, points, 255, lineType=4)
+        mask = np.zeros((h, w), dtype=np.uint8)
+        cv2.fillPoly(mask, [points], 255)
+        mask = mask.reshape([mask.shape[0],mask.shape[1], 1])
+        return mask

training/dataset/utils/SLADD.py ADDED Viewed

	@@ -0,0 +1,163 @@

+from enum import Enum
+from functools import reduce
+import cv2
+import numpy as np
+from scipy.ndimage import binary_dilation
+from .DeepFakeMask import Mask
+def dist(a, b):
+    x1, y1 = a
+    x2, y2 = b
+    return np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)
+    # return np.linalg.norm(a-b)
+def get_five_key(landmarks_68):
+    # get the five key points by using the landmarks
+    leye_center = (landmarks_68[36] + landmarks_68[39]) * 0.5
+    reye_center = (landmarks_68[42] + landmarks_68[45]) * 0.5
+    nose = landmarks_68[33]
+    lmouth = landmarks_68[48]
+    rmouth = landmarks_68[54]
+    leye_left = landmarks_68[36]
+    leye_right = landmarks_68[39]
+    reye_left = landmarks_68[42]
+    reye_right = landmarks_68[45]
+    out = [
+        tuple(x.astype("int32"))
+        for x in [
+            leye_center,
+            reye_center,
+            nose,
+            lmouth,
+            rmouth,
+            leye_left,
+            leye_right,
+            reye_left,
+            reye_right,
+        ]
+    ]
+    return out
+def remove_eyes(image, landmarks, opt):
+    ##l: left eye; r: right eye, b: both eye
+    if opt == "l":
+        (x1, y1), (x2, y2) = landmarks[5:7]
+    elif opt == "r":
+        (x1, y1), (x2, y2) = landmarks[7:9]
+    elif opt == "b":
+        (x1, y1), (x2, y2) = landmarks[:2]
+    else:
+        print("wrong region")
+    mask = np.zeros_like(image[..., 0])
+    line = cv2.line(mask, (x1, y1), (x2, y2), color=(1), thickness=2)
+    w = dist((x1, y1), (x2, y2))
+    dilation = int(w // 4)
+    if opt != "b":
+        dilation *= 4
+    line = binary_dilation(line, iterations=dilation)
+    return line
+def remove_nose(image, landmarks):
+    (x1, y1), (x2, y2) = landmarks[:2]
+    x3, y3 = landmarks[2]
+    mask = np.zeros_like(image[..., 0])
+    x4 = int((x1 + x2) / 2)
+    y4 = int((y1 + y2) / 2)
+    line = cv2.line(mask, (x3, y3), (x4, y4), color=(1), thickness=2)
+    w = dist((x1, y1), (x2, y2))
+    dilation = int(w // 4)
+    line = binary_dilation(line, iterations=dilation)
+    return line
+def remove_mouth(image, landmarks):
+    (x1, y1), (x2, y2) = landmarks[3:5]
+    mask = np.zeros_like(image[..., 0])
+    line = cv2.line(mask, (x1, y1), (x2, y2), color=(1), thickness=2)
+    w = dist((x1, y1), (x2, y2))
+    dilation = int(w // 3)
+    line = binary_dilation(line, iterations=dilation)
+    return line
+class SladdRegion(Enum):
+    left_eye = 0
+    right_eye = 1
+    nose = 2
+    mouth = 3
+    # composition
+    both_eyes = left_eye + right_eye  # 4
+class SladdMasking(Mask):
+    # [0, 1, 2, 3, (0, 1), (0, 2), (1, 2), (2, 3), (0, 1, 2), (0, 1, 2, 3)]
+    # left-eye, right-eye, nose, mouth, ...
+    ALL_REGIONS = [
+        SladdRegion.left_eye,
+        SladdRegion.right_eye,
+        SladdRegion.nose,
+        SladdRegion.mouth,
+    ]
+    REGIONS = [
+        [SladdRegion.left_eye],
+        [SladdRegion.right_eye],
+        [SladdRegion.nose],
+        [SladdRegion.mouth],
+        [SladdRegion.left_eye, SladdRegion.right_eye],
+        [SladdRegion.left_eye, SladdRegion.nose],
+        [SladdRegion.right_eye, SladdRegion.nose],
+        [SladdRegion.nose, SladdRegion.mouth],
+        [SladdRegion.left_eye, SladdRegion.right_eye, SladdRegion.nose],
+        ALL_REGIONS,
+    ]
+    def init(self, compose: bool = False, single: bool = True, **kwargs):
+        # super().__init__(**kwargs)
+        self.compose = compose
+        if compose:
+            self.regions = SladdMasking.REGIONS
+        else:
+            self.regions = [reg for reg in SladdMasking.REGIONS if len(reg) == 1]
+        if single:
+            self.regions = [self.ALL_REGIONS]
+    @property
+    def total(self) -> int:
+        return len(self.regions)
+    @staticmethod
+    def parse(img, reg, landmarks) -> np.ndarray:
+        five_key = get_five_key(landmarks)
+        if reg is SladdRegion.left_eye:
+            mask = remove_eyes(img, five_key, "l")
+        elif reg is SladdRegion.right_eye:
+            mask = remove_eyes(img, five_key, "r")
+        elif reg is SladdRegion.nose:
+            mask = remove_nose(img, five_key)
+        elif reg is SladdRegion.mouth:
+            mask = remove_mouth(img, five_key)
+        else:
+            raise ValueError("Invalid region")
+        # elif reg == SladdRegion4:
+        #    mask = remove_eyes(img, five_key, "b")
+        return mask
+    def build_mask(self) -> np.ndarray:
+        self.init()
+        h, w = self.face.shape[:2]
+        # print(len(self.regions))
+        regs = [self.regions[0][self.idx]]
+        # if isinstance(reg, int):
+        #    mask = parse(img, reg, landmarks)
+        masks = [SladdMasking.parse(self.face, reg, self.landmarks) for reg in regs]
+        mask = reduce(np.maximum, masks)
+        mask = mask.reshape([mask.shape[0],mask.shape[1], 1])
+        return mask

training/dataset/utils/attribution_mask.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import cv2
+import math
+import numpy as np
+from scipy.ndimage import binary_erosion, binary_dilation
+def dist(p1, p2):
+    return math.sqrt((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2)
+def remove_mouth(image, landmarks):
+    (x1, y1), (x2, y2) = landmarks[3:5]
+    mask = np.zeros_like(image[..., 0])
+    line = cv2.line(mask, (x1, y1), (x2, y2), color=(1), thickness=2)
+    w = dist((x1, y1), (x2, y2))
+    dilation = int(w // 3)
+    line = binary_dilation(line, iterations=dilation)
+    return line
+def remove_eyes(image, landmarks, opt='b'):
+    ##l: left eye; r: right eye, b: both eye
+    if opt == 'l':
+        (x1, y1), (x2, y2) = landmarks[36],landmarks[39]
+    elif opt == 'r':
+        (x1, y1), (x2, y2) = landmarks[42],landmarks[46]
+    elif opt == 'b':
+        (x1, y1), (x2, y2) = landmarks[36],landmarks[46]
+    else:
+        print('wrong region')
+    mask = np.zeros_like(image[..., 0])
+    line = cv2.line(np.array(mask, dtype=np.uint8), (int(x1), int(y1)), (int(x2), int(y2)), color=(1), thickness=2)
+    w = dist((x1, y1), (x2, y2))
+    dilation = int(w // 4)
+    if opt != 'b':
+        dilation *= 4
+    line = binary_dilation(line, iterations=dilation)
+    return line
+def remove_nose(image, landmarks):
+    ##l: left eye; r: right eye, b: both eye
+    (x1, y1), (x2, y2) = landmarks[27], landmarks[30]
+    mask = np.zeros_like(image[..., 0])
+    line = cv2.line(np.array(mask, dtype=np.uint8), (int(x1), int(y1)), (int(x2), int(y2)), color=(1), thickness=2)
+    w = dist((x1, y1), (x2, y2))
+    dilation = int(w // 3)
+    line1 = binary_dilation(line, iterations=dilation)
+    (x1, y1), (x2, y2) = landmarks[31], landmarks[35]
+    mask = np.zeros_like(image[..., 0])
+    line = cv2.line(np.array(mask, dtype=np.uint8), (int(x1), int(y1)), (int(x2), int(y2)), color=(1), thickness=2)
+    w = dist((x1, y1), (x2, y2))
+    dilation = int(w //4 )
+    line2 = binary_dilation(line, iterations=dilation)
+    return line1+line2

training/dataset/utils/bi_online_generation.py ADDED Viewed

	@@ -0,0 +1,289 @@

+import dlib
+from skimage import io
+from skimage import transform as sktransform
+import numpy as np
+from matplotlib import pyplot as plt
+import json
+import os
+import random
+from PIL import Image
+from imgaug import augmenters as iaa
+from dataset.library.DeepFakeMask import dfl_full,facehull,components,extended
+from dataset.utils.attribution_mask import *
+import cv2
+import tqdm
+'''
+from PIL import ImageDraw
+# 创建一个可以在图像上绘制的对象
+img_pil=Image.fromarray(img)
+draw = ImageDraw.Draw(img_pil)
+# 在图像上绘制点
+for i, point in enumerate(landmark):
+    x, y = point
+    radius = 1  # 点的半径
+    draw.ellipse((x-radius, y-radius, x+radius, y+radius), fill="red")
+    draw.text((x+radius+2, y-radius), str(i), fill="black")  # 在点旁边添加标签
+img_pil.show()
+'''
+def name_resolve(path):
+    name = os.path.splitext(os.path.basename(path))[0]
+    vid_id, frame_id = name.split('_')[0:2]
+    return vid_id, frame_id
+def total_euclidean_distance(a,b):
+    assert len(a.shape) == 2
+    return np.sum(np.linalg.norm(a-b,axis=1))
+def get_five_key(landmarks_68):
+    # get the five key points by using the landmarks
+    leye_center = (landmarks_68[36] + landmarks_68[39])*0.5
+    reye_center = (landmarks_68[42] + landmarks_68[45])*0.5
+    nose = landmarks_68[33]
+    lmouth = landmarks_68[48]
+    rmouth = landmarks_68[54]
+    leye_left = landmarks_68[36]
+    leye_right = landmarks_68[39]
+    reye_left = landmarks_68[42]
+    reye_right = landmarks_68[45]
+    out = [ tuple(x.astype('int32')) for x in [
+        leye_center,reye_center,nose,lmouth,rmouth,leye_left,leye_right,reye_left,reye_right
+    ]]
+    return out
+def random_get_hull(landmark,img1,hull_type=None):
+    if hull_type==None:
+        hull_type = random.choice([0,1,2,3])
+    if hull_type == 0:
+        mask = dfl_full(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
+        return mask[:,:,0]/255
+    elif hull_type == 1:
+        mask = extended(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
+        return mask[:,:,0]/255
+    elif hull_type == 2:
+        mask = components(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
+        return mask[:,:,0]/255
+    elif hull_type == 3:
+        mask = facehull(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
+        return mask[:,:,0]/255
+    elif hull_type == 4:
+        mask = remove_mouth(img1,get_five_key(landmark))
+        return mask.astype(np.float32)
+    elif hull_type == 5:
+        mask = remove_eyes(img1,landmark)
+        return mask.astype(np.float32)
+    elif hull_type == 6:
+        mask = remove_nose(img1,landmark)
+        return mask.astype(np.float32)
+    elif hull_type == 7:
+        mask = remove_nose(img1,landmark) + remove_eyes(img1,landmark) + remove_mouth(img1,get_five_key(landmark))
+        return mask.astype(np.float32)
+def random_erode_dilate(mask, ksize=None):
+    if random.random()>0.5:
+        if ksize is  None:
+            ksize = random.randint(1,21)
+        if ksize % 2 == 0:
+            ksize += 1
+        mask = np.array(mask).astype(np.uint8)*255
+        kernel = np.ones((ksize,ksize),np.uint8)
+        mask = cv2.erode(mask,kernel,1)/255
+    else:
+        if ksize is  None:
+            ksize = random.randint(1,5)
+        if ksize % 2 == 0:
+            ksize += 1
+        mask = np.array(mask).astype(np.uint8)*255
+        kernel = np.ones((ksize,ksize),np.uint8)
+        mask = cv2.dilate(mask,kernel,1)/255
+    return mask
+# borrow from https://github.com/MarekKowalski/FaceSwap
+def blendImages(src, dst, mask, featherAmount=0.2):
+    maskIndices = np.where(mask != 0)
+    src_mask = np.ones_like(mask)
+    dst_mask = np.zeros_like(mask)
+    maskPts = np.hstack((maskIndices[1][:, np.newaxis], maskIndices[0][:, np.newaxis]))
+    faceSize = np.max(maskPts, axis=0) - np.min(maskPts, axis=0)
+    featherAmount = featherAmount * np.max(faceSize)
+    hull = cv2.convexHull(maskPts)
+    dists = np.zeros(maskPts.shape[0])
+    for i in range(maskPts.shape[0]):
+        dists[i] = cv2.pointPolygonTest(hull, (maskPts[i, 0], maskPts[i, 1]), True)
+    weights = np.clip(dists / featherAmount, 0, 1)
+    composedImg = np.copy(dst)
+    composedImg[maskIndices[0], maskIndices[1]] = weights[:, np.newaxis] * src[maskIndices[0], maskIndices[1]] + (1 - weights[:, np.newaxis]) * dst[maskIndices[0], maskIndices[1]]
+    composedMask = np.copy(dst_mask)
+    composedMask[maskIndices[0], maskIndices[1]] = weights[:, np.newaxis] * src_mask[maskIndices[0], maskIndices[1]] + (
+                1 - weights[:, np.newaxis]) * dst_mask[maskIndices[0], maskIndices[1]]
+    return composedImg, composedMask
+# borrow from https://github.com/MarekKowalski/FaceSwap
+def colorTransfer(src, dst, mask):
+    transferredDst = np.copy(dst)
+    maskIndices = np.where(mask != 0)
+    maskedSrc = src[maskIndices[0], maskIndices[1]].astype(np.int32)
+    maskedDst = dst[maskIndices[0], maskIndices[1]].astype(np.int32)
+    meanSrc = np.mean(maskedSrc, axis=0)
+    meanDst = np.mean(maskedDst, axis=0)
+    maskedDst = maskedDst - meanDst
+    maskedDst = maskedDst + meanSrc
+    maskedDst = np.clip(maskedDst, 0, 255)
+    transferredDst[maskIndices[0], maskIndices[1]] = maskedDst
+    return transferredDst
+class BIOnlineGeneration():
+    def __init__(self):
+        with open('precomuted_landmarks.json', 'r') as f:
+            self.landmarks_record =  json.load(f)
+            for k,v in self.landmarks_record.items():
+                self.landmarks_record[k] = np.array(v)
+        # extract all frame from all video in the name of {videoid}_{frameid}
+        self.data_list = [
+                    '000_0000.png',
+                    '001_0000.png'
+                    ] * 10000
+        # predefine mask distortion
+        self.distortion = iaa.Sequential([iaa.PiecewiseAffine(scale=(0.01, 0.15))])
+    def gen_one_datapoint(self):
+        background_face_path = random.choice(self.data_list)
+        data_type = 'real' if random.randint(0,1) else 'fake'
+        if data_type == 'fake' :
+            face_img,mask =  self.get_blended_face(background_face_path)
+            mask = ( 1 - mask ) * mask * 4
+        else:
+            face_img = io.imread(background_face_path)
+            mask = np.zeros((317, 317, 1))
+        # randomly downsample after BI pipeline
+        if random.randint(0,1):
+            aug_size = random.randint(64, 317)
+            face_img = Image.fromarray(face_img)
+            if random.randint(0,1):
+                face_img = face_img.resize((aug_size, aug_size), Image.BILINEAR)
+            else:
+                face_img = face_img.resize((aug_size, aug_size), Image.NEAREST)
+            face_img = face_img.resize((317, 317),Image.BILINEAR)
+            face_img = np.array(face_img)
+        # random jpeg compression after BI pipeline
+        if random.randint(0,1):
+            quality = random.randint(60, 100)
+            encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), quality]
+            face_img_encode = cv2.imencode('.jpg', face_img, encode_param)[1]
+            face_img = cv2.imdecode(face_img_encode, cv2.IMREAD_COLOR)
+        face_img = face_img[60:317,30:287,:]
+        mask = mask[60:317,30:287,:]
+        # random flip
+        if random.randint(0,1):
+            face_img = np.flip(face_img,1)
+            mask = np.flip(mask,1)
+        return face_img,mask,data_type
+    def get_blended_face(self,background_face_path):
+        background_face = io.imread(background_face_path)
+        background_landmark = self.landmarks_record[background_face_path]
+        foreground_face_path = self.search_similar_face(background_landmark,background_face_path)
+        foreground_face = io.imread(foreground_face_path)
+        # down sample before blending
+        aug_size = random.randint(128,317)
+        background_landmark = background_landmark * (aug_size/317)
+        foreground_face = sktransform.resize(foreground_face,(aug_size,aug_size),preserve_range=True).astype(np.uint8)
+        background_face = sktransform.resize(background_face,(aug_size,aug_size),preserve_range=True).astype(np.uint8)
+        # get random type of initial blending mask
+        mask = random_get_hull(background_landmark, background_face)
+        #  random deform mask
+        mask = self.distortion.augment_image(mask)
+        mask = random_erode_dilate(mask)
+        # filte empty mask after deformation
+        if np.sum(mask) == 0 :
+            raise NotImplementedError
+        # apply color transfer
+        foreground_face = colorTransfer(background_face, foreground_face, mask*255)
+        # blend two face
+        blended_face, mask = blendImages(foreground_face, background_face, mask*255)
+        blended_face = blended_face.astype(np.uint8)
+        # resize back to default resolution
+        blended_face = sktransform.resize(blended_face,(317,317),preserve_range=True).astype(np.uint8)
+        mask = sktransform.resize(mask,(317,317),preserve_range=True)
+        mask = mask[:,:,0:1]
+        return blended_face,mask
+    def search_similar_face(self,this_landmark,background_face_path):
+        vid_id, frame_id = name_resolve(background_face_path)
+        min_dist = 99999999
+        # random sample 5000 frame from all frams:
+        all_candidate_path = random.sample( self.data_list, k=5000)
+        # filter all frame that comes from the same video as background face
+        all_candidate_path = filter(lambda k:name_resolve(k)[0] != vid_id, all_candidate_path)
+        all_candidate_path = list(all_candidate_path)
+        # loop throungh all candidates frame to get best match
+        for candidate_path in all_candidate_path:
+            candidate_landmark = self.landmarks_record[candidate_path].astype(np.float32)
+            candidate_distance = total_euclidean_distance(candidate_landmark, this_landmark)
+            if candidate_distance < min_dist:
+                min_dist = candidate_distance
+                min_path = candidate_path
+        return min_path
+if __name__ == '__main__':
+    ds = BIOnlineGeneration()
+    from tqdm import tqdm
+    all_imgs = []
+    for _ in tqdm(range(50)):
+        img,mask,label = ds.gen_one_datapoint()
+        mask = np.repeat(mask,3,2)
+        mask = (mask*255).astype(np.uint8)
+        img_cat = np.concatenate([img,mask],1)
+        all_imgs.append(img_cat)
+    all_in_one = Image.new('RGB', (2570,2570))
+    for x in range(5):
+        for y in range(10):
+            idx = x*10+y
+            im = Image.fromarray(all_imgs[idx])
+            dx = x*514
+            dy = y*257
+            all_in_one.paste(im, (dx,dy))
+    all_in_one.save("all_in_one.jpg")

training/dataset/utils/bi_online_generation_yzy.py ADDED Viewed

	@@ -0,0 +1,268 @@

+import dlib
+from skimage import io
+from skimage import transform as sktransform
+import numpy as np
+from matplotlib import pyplot as plt
+import json
+import os
+import random
+from PIL import Image
+from imgaug import augmenters as iaa
+from .DeepFakeMask import dfl_full,facehull,components,extended,gridMasking,MeshgridMasking, facehull2
+from .SLADD import SladdMasking
+import cv2
+import torch
+import torch.nn as nn
+import tqdm
+import pdb
+def name_resolve(path):
+    name = os.path.splitext(os.path.basename(path))[0]
+    vid_id, frame_id = name.split('_')[0:2]
+    return vid_id, frame_id
+def total_euclidean_distance(a,b):
+    assert len(a.shape) == 2
+    return np.sum(np.linalg.norm(a-b,axis=1))
+def random_get_hull(landmark,img1,hull_type0, idx=0):
+    # print("in bi online generation----------",hull_type0)
+    if hull_type0 == -1:
+        hull_type = random.choice([0,1,2,3])
+    else:
+        # hull_type = int(random.choice(hull_type0))
+        hull_type = hull_type0
+    # print(hull_type)
+    if hull_type == 0:
+        # print("here")
+        mask = dfl_full(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
+        return mask/255, idx
+    elif hull_type == 1:
+        mask = extended(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
+        return mask/255, idx
+    elif hull_type == 2:
+        mask = components(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
+        return mask/255, idx
+    elif hull_type == 3:
+        mask = facehull(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
+        return mask/255, idx # --change0628-- mask/255
+    # elif hull_type == 4: # SLADD
+    #     mask = SladdMasking(landmarks=landmark.astype('int32'),face=img1, channels=3, idx=0).mask
+    #     return mask/1., idx
+    # elif hull_type == 5: # SLADD
+    #     mask = SladdMasking(landmarks=landmark.astype('int32'),face=img1, channels=3, idx=1).mask
+    #     return mask/1., idx
+    # elif hull_type == 6: # SLADD
+    #     mask = SladdMasking(landmarks=landmark.astype('int32'),face=img1, channels=3, idx=2).mask
+    #     return mask/1., idx
+    elif hull_type == 6: # SLADD/mouth
+        mask = SladdMasking(landmarks=landmark.astype('int32'),face=img1, channels=3, idx=3).mask
+        return mask/1., idx
+def random_erode_dilate(mask, ksize=None):
+    if random.random()>0.5:
+        if ksize is  None:
+            ksize = random.randint(1,21)
+        if ksize % 2 == 0:
+            ksize += 1
+        mask = np.array(mask).astype(np.uint8)*255
+        kernel = np.ones((ksize,ksize),np.uint8)
+        mask = cv2.erode(mask,kernel,1)/255
+    else:
+        if ksize is  None:
+            ksize = random.randint(1,5)
+        if ksize % 2 == 0:
+            ksize += 1
+        mask = np.array(mask).astype(np.uint8)*255
+        kernel = np.ones((ksize,ksize),np.uint8)
+        mask = cv2.dilate(mask,kernel,1)/255
+    return mask
+# borrow from https://github.com/MarekKowalski/FaceSwap
+def blendImages(src, dst, mask, featherAmount=0.2):
+    maskIndices = np.where(mask != 0)
+    src_mask = np.ones_like(mask)
+    dst_mask = np.zeros_like(mask)
+    maskPts = np.hstack((maskIndices[1][:, np.newaxis], maskIndices[0][:, np.newaxis]))
+    faceSize = np.max(maskPts, axis=0) - np.min(maskPts, axis=0)
+    featherAmount = featherAmount * np.max(faceSize)
+    hull = cv2.convexHull(maskPts)
+    dists = np.zeros(maskPts.shape[0])
+    for i in range(maskPts.shape[0]):
+        dists[i] = cv2.pointPolygonTest(hull, (maskPts[i, 0], maskPts[i, 1]), True)
+    weights = np.clip(dists / featherAmount, 0, 1)
+    composedImg = np.copy(dst)
+    composedImg[maskIndices[0], maskIndices[1]] = weights[:, np.newaxis] * src[maskIndices[0], maskIndices[1]] + (1 - weights[:, np.newaxis]) * dst[maskIndices[0], maskIndices[1]]
+    composedMask = np.copy(dst_mask)
+    composedMask[maskIndices[0], maskIndices[1]] = weights[:, np.newaxis] * src_mask[maskIndices[0], maskIndices[1]] + (
+                1 - weights[:, np.newaxis]) * dst_mask[maskIndices[0], maskIndices[1]]
+    return composedImg, composedMask
+# borrow from https://github.com/MarekKowalski/FaceSwap
+def colorTransfer(src, dst, mask):
+    transferredDst = np.copy(dst)
+    maskIndices = np.where(mask != 0)
+    maskedSrc = src[maskIndices[0], maskIndices[1]].astype(np.int32)
+    maskedDst = dst[maskIndices[0], maskIndices[1]].astype(np.int32)
+    meanSrc = np.mean(maskedSrc, axis=0)
+    meanDst = np.mean(maskedDst, axis=0)
+    maskedDst = maskedDst - meanDst
+    maskedDst = maskedDst + meanSrc
+    maskedDst = np.clip(maskedDst, 0, 255)
+    transferredDst[maskIndices[0], maskIndices[1]] = maskedDst
+    return transferredDst
+class BIOnlineGeneration():
+    def __init__(self):
+        with open('precomuted_landmarks.json', 'r') as f:
+            self.landmarks_record =  json.load(f)
+            for k,v in self.landmarks_record.items():
+                self.landmarks_record[k] = np.array(v)
+        # extract all frame from all video in the name of {videoid}_{frameid}
+        self.data_list = [
+                    '000_0000.png',
+                    '001_0000.png'
+                    ] * 10000
+        # predefine mask distortion
+        self.distortion = iaa.Sequential([iaa.PiecewiseAffine(scale=(0.01, 0.15))])
+    def gen_one_datapoint(self):
+        background_face_path = random.choice(self.data_list)
+        data_type = 'real' if random.randint(0,1) else 'fake'
+        if data_type == 'fake' :
+            face_img,mask =  self.get_blended_face(background_face_path)
+            mask = ( 1 - mask ) * mask * 4
+        else:
+            face_img = io.imread(background_face_path)
+            mask = np.zeros((317, 317, 1))
+        # randomly downsample after BI pipeline
+        if random.randint(0,1):
+            aug_size = random.randint(64, 317)
+            face_img = Image.fromarray(face_img)
+            if random.randint(0,1):
+                face_img = face_img.resize((aug_size, aug_size), Image.BILINEAR)
+            else:
+                face_img = face_img.resize((aug_size, aug_size), Image.NEAREST)
+            face_img = face_img.resize((317, 317),Image.BILINEAR)
+            face_img = np.array(face_img)
+        # random jpeg compression after BI pipeline
+        if random.randint(0,1):
+            quality = random.randint(60, 100)
+            encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), quality]
+            face_img_encode = cv2.imencode('.jpg', face_img, encode_param)[1]
+            face_img = cv2.imdecode(face_img_encode, cv2.IMREAD_COLOR)
+        face_img = face_img[60:317,30:287,:]
+        mask = mask[60:317,30:287,:]
+        # random flip
+        if random.randint(0,1):
+            face_img = np.flip(face_img,1)
+            mask = np.flip(mask,1)
+        return face_img,mask,data_type
+    def get_blended_face(self,background_face_path):
+        background_face = io.imread(background_face_path)
+        background_landmark = self.landmarks_record[background_face_path]
+        foreground_face_path = self.search_similar_face(background_landmark,background_face_path)
+        foreground_face = io.imread(foreground_face_path)
+        # down sample before blending
+        aug_size = random.randint(128,317)
+        background_landmark = background_landmark * (aug_size/317)
+        foreground_face = sktransform.resize(foreground_face,(aug_size,aug_size),preserve_range=True).astype(np.uint8)
+        background_face = sktransform.resize(background_face,(aug_size,aug_size),preserve_range=True).astype(np.uint8)
+        # get random type of initial blending mask
+        mask, idx = random_get_hull(background_landmark, background_face)
+        #  random deform mask
+        mask = self.distortion.augment_image(mask)
+        mask = random_erode_dilate(mask)
+        # filte empty mask after deformation
+        if np.sum(mask) == 0 :
+            raise NotImplementedError
+        # apply color transfer
+        foreground_face = colorTransfer(background_face, foreground_face, mask*255)
+        # blend two face
+        blended_face, mask = blendImages(foreground_face, background_face, mask*255)
+        blended_face = blended_face.astype(np.uint8)
+        # resize back to default resolution
+        blended_face = sktransform.resize(blended_face,(317,317),preserve_range=True).astype(np.uint8)
+        mask = sktransform.resize(mask,(317,317),preserve_range=True)
+        mask = mask[:,:,0:1]
+        return blended_face,mask
+    def search_similar_face(self,this_landmark,background_face_path):
+        vid_id, frame_id = name_resolve(background_face_path)
+        min_dist = 99999999
+        # random sample 5000 frame from all frams:
+        all_candidate_path = random.sample( self.data_list, k=5000)
+        # filter all frame that comes from the same video as background face
+        all_candidate_path = filter(lambda k:name_resolve(k)[0] != vid_id, all_candidate_path)
+        all_candidate_path = list(all_candidate_path)
+        # loop throungh all candidates frame to get best match
+        for candidate_path in all_candidate_path:
+            candidate_landmark = self.landmarks_record[candidate_path].astype(np.float32)
+            candidate_distance = total_euclidean_distance(candidate_landmark, this_landmark)
+            if candidate_distance < min_dist:
+                min_dist = candidate_distance
+                min_path = candidate_path
+        return min_path
+if __name__ == '__main__':
+    ds = BIOnlineGeneration()
+    from tqdm import tqdm
+    all_imgs = []
+    for _ in tqdm(range(50)):
+        img,mask,label = ds.gen_one_datapoint()
+        mask = np.repeat(mask,3,2)
+        mask = (mask*255).astype(np.uint8)
+        img_cat = np.concatenate([img,mask],1)
+        all_imgs.append(img_cat)
+    all_in_one = Image.new('RGB', (2570,2570))
+    for x in range(5):
+        for y in range(10):
+            idx = x*10+y
+            im = Image.fromarray(all_imgs[idx])
+            dx = x*514
+            dy = y*257
+            all_in_one.paste(im, (dx,dy))
+    all_in_one.save("all_in_one.jpg")

training/dataset/utils/color_transfer.py ADDED Viewed

	@@ -0,0 +1,516 @@

+import cv2
+import numpy as np
+from numpy import linalg as npla
+import scipy as sp
+import scipy.sparse
+from scipy.sparse.linalg import spsolve
+def color_transfer_sot(src, trg, steps=10, batch_size=5, reg_sigmaXY=16.0, reg_sigmaV=5.0):
+    """
+    Color Transform via Sliced Optimal Transfer
+    ported by @iperov from https://github.com/dcoeurjo/OTColorTransfer
+    src         - any float range any channel image
+    dst         - any float range any channel image, same shape as src
+    steps       - number of solver steps
+    batch_size  - solver batch size
+    reg_sigmaXY - apply regularization and sigmaXY of filter, otherwise set to 0.0
+    reg_sigmaV  - sigmaV of filter
+    return value - clip it manually
+    """
+    if not np.issubdtype(src.dtype, np.floating):
+        raise ValueError("src value must be float")
+    if not np.issubdtype(trg.dtype, np.floating):
+        raise ValueError("trg value must be float")
+    if len(src.shape) != 3:
+        raise ValueError("src shape must have rank 3 (h,w,c)")
+    if src.shape != trg.shape:
+        raise ValueError("src and trg shapes must be equal")
+    src_dtype = src.dtype
+    h, w, c = src.shape
+    new_src = src.copy()
+    for step in range(steps):
+        advect = np.zeros((h*w, c), dtype=src_dtype)
+        for batch in range(batch_size):
+            dir = np.random.normal(size=c).astype(src_dtype)
+            dir /= npla.norm(dir)
+            projsource = np.sum(new_src*dir, axis=-1).reshape((h*w))
+            projtarget = np.sum(trg*dir, axis=-1).reshape((h*w))
+            idSource = np.argsort(projsource)
+            idTarget = np.argsort(projtarget)
+            a = projtarget[idTarget]-projsource[idSource]
+            for i_c in range(c):
+                advect[idSource, i_c] += a * dir[i_c]
+        new_src += advect.reshape((h, w, c)) / batch_size
+    if reg_sigmaXY != 0.0:
+        src_diff = new_src-src
+        src_diff_filt = cv2.bilateralFilter(
+            src_diff, 0, reg_sigmaV, reg_sigmaXY)
+        if len(src_diff_filt.shape) == 2:
+            src_diff_filt = src_diff_filt[..., None]
+        new_src = src + src_diff_filt
+    return new_src
+def color_transfer_mkl(x0, x1):
+    eps = np.finfo(float).eps
+    h, w, c = x0.shape
+    h1, w1, c1 = x1.shape
+    x0 = x0.reshape((h*w, c))
+    x1 = x1.reshape((h1*w1, c1))
+    a = np.cov(x0.T)
+    b = np.cov(x1.T)
+    Da2, Ua = np.linalg.eig(a)
+    Da = np.diag(np.sqrt(Da2.clip(eps, None)))
+    C = np.dot(np.dot(np.dot(np.dot(Da, Ua.T), b), Ua), Da)
+    Dc2, Uc = np.linalg.eig(C)
+    Dc = np.diag(np.sqrt(Dc2.clip(eps, None)))
+    Da_inv = np.diag(1./(np.diag(Da)))
+    t = np.dot(
+        np.dot(np.dot(np.dot(np.dot(np.dot(Ua, Da_inv), Uc), Dc), Uc.T), Da_inv), Ua.T)
+    mx0 = np.mean(x0, axis=0)
+    mx1 = np.mean(x1, axis=0)
+    result = np.dot(x0-mx0, t) + mx1
+    return np.clip(result.reshape((h, w, c)).astype(x0.dtype), 0, 1)
+def color_transfer_idt(i0, i1, bins=256, n_rot=20):
+    relaxation = 1 / n_rot
+    h, w, c = i0.shape
+    h1, w1, c1 = i1.shape
+    i0 = i0.reshape((h*w, c))
+    i1 = i1.reshape((h1*w1, c1))
+    n_dims = c
+    d0 = i0.T
+    d1 = i1.T
+    for i in range(n_rot):
+        r = sp.stats.special_ortho_group.rvs(n_dims).astype(np.float32)
+        d0r = np.dot(r, d0)
+        d1r = np.dot(r, d1)
+        d_r = np.empty_like(d0)
+        for j in range(n_dims):
+            lo = min(d0r[j].min(), d1r[j].min())
+            hi = max(d0r[j].max(), d1r[j].max())
+            p0r, edges = np.histogram(d0r[j], bins=bins, range=[lo, hi])
+            p1r, _ = np.histogram(d1r[j], bins=bins, range=[lo, hi])
+            cp0r = p0r.cumsum().astype(np.float32)
+            cp0r /= cp0r[-1]
+            cp1r = p1r.cumsum().astype(np.float32)
+            cp1r /= cp1r[-1]
+            f = np.interp(cp0r, cp1r, edges[1:])
+            d_r[j] = np.interp(d0r[j], edges[1:], f, left=0, right=bins)
+        d0 = relaxation * np.linalg.solve(r, (d_r - d0r)) + d0
+    return np.clip(d0.T.reshape((h, w, c)).astype(i0.dtype), 0, 1)
+def laplacian_matrix(n, m):
+    mat_D = scipy.sparse.lil_matrix((m, m))
+    mat_D.setdiag(-1, -1)
+    mat_D.setdiag(4)
+    mat_D.setdiag(-1, 1)
+    mat_A = scipy.sparse.block_diag([mat_D] * n).tolil()
+    mat_A.setdiag(-1, 1*m)
+    mat_A.setdiag(-1, -1*m)
+    return mat_A
+def seamless_clone(source, target, mask):
+    h, w, c = target.shape
+    result = []
+    mat_A = laplacian_matrix(h, w)
+    laplacian = mat_A.tocsc()
+    mask[0, :] = 1
+    mask[-1, :] = 1
+    mask[:, 0] = 1
+    mask[:, -1] = 1
+    q = np.argwhere(mask == 0)
+    k = q[:, 1]+q[:, 0]*w
+    mat_A[k, k] = 1
+    mat_A[k, k + 1] = 0
+    mat_A[k, k - 1] = 0
+    mat_A[k, k + w] = 0
+    mat_A[k, k - w] = 0
+    mat_A = mat_A.tocsc()
+    mask_flat = mask.flatten()
+    for channel in range(c):
+        source_flat = source[:, :, channel].flatten()
+        target_flat = target[:, :, channel].flatten()
+        mat_b = laplacian.dot(source_flat)*0.75
+        mat_b[mask_flat == 0] = target_flat[mask_flat == 0]
+        x = spsolve(mat_A, mat_b).reshape((h, w))
+        result.append(x)
+    return np.clip(np.dstack(result), 0, 1)
+def reinhard_color_transfer(target, source, clip=False, preserve_paper=False, source_mask=None, target_mask=None):
+    """
+    Transfers the color distribution from the source to the target
+    image using the mean and standard deviations of the L*a*b*
+    color space.
+    This implementation is (loosely) based on to the "Color Transfer
+    between Images" paper by Reinhard et al., 2001.
+    Parameters:
+    -------
+    source: NumPy array
+            OpenCV image in BGR color space (the source image)
+    target: NumPy array
+            OpenCV image in BGR color space (the target image)
+    clip: Should components of L*a*b* image be scaled by np.clip before
+            converting back to BGR color space?
+            If False then components will be min-max scaled appropriately.
+            Clipping will keep target image brightness truer to the input.
+            Scaling will adjust image brightness to avoid washed out portions
+            in the resulting color transfer that can be caused by clipping.
+    preserve_paper: Should color transfer strictly follow methodology
+            layed out in original paper? The method does not always produce
+            aesthetically pleasing results.
+            If False then L*a*b* components will scaled using the reciprocal of
+            the scaling factor proposed in the paper.  This method seems to produce
+            more consistently aesthetically pleasing results
+    Returns:
+    -------
+    transfer: NumPy array
+            OpenCV image (w, h, 3) NumPy array (uint8)
+    """
+    # convert the images from the RGB to L*ab* color space, being
+    # sure to utilizing the floating point data type (note: OpenCV
+    # expects floats to be 32-bit, so use that instead of 64-bit)
+    source = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype(np.float32)
+    target = cv2.cvtColor(target, cv2.COLOR_BGR2LAB).astype(np.float32)
+    # compute color statistics for the source and target images
+    src_input = source if source_mask is None else source*source_mask
+    tgt_input = target if target_mask is None else target*target_mask
+    (lMeanSrc, lStdSrc, aMeanSrc, aStdSrc,
+     bMeanSrc, bStdSrc) = lab_image_stats(src_input)
+    (lMeanTar, lStdTar, aMeanTar, aStdTar,
+     bMeanTar, bStdTar) = lab_image_stats(tgt_input)
+    # subtract the means from the target image
+    (l, a, b) = cv2.split(target)
+    l -= lMeanTar
+    a -= aMeanTar
+    b -= bMeanTar
+    if preserve_paper:
+                # scale by the standard deviations using paper proposed factor
+        l = (lStdTar / lStdSrc) * l
+        a = (aStdTar / aStdSrc) * a
+        b = (bStdTar / bStdSrc) * b
+    else:
+        # scale by the standard deviations using reciprocal of paper proposed factor
+        l = (lStdSrc / lStdTar) * l
+        a = (aStdSrc / aStdTar) * a
+        b = (bStdSrc / bStdTar) * b
+    # add in the source mean
+    l += lMeanSrc
+    a += aMeanSrc
+    b += bMeanSrc
+    # clip/scale the pixel intensities to [0, 255] if they fall
+    # outside this range
+    l = _scale_array(l, clip=clip)
+    a = _scale_array(a, clip=clip)
+    b = _scale_array(b, clip=clip)
+    # merge the channels together and convert back to the RGB color
+    # space, being sure to utilize the 8-bit unsigned integer data
+    # type
+    transfer = cv2.merge([l, a, b])
+    transfer = cv2.cvtColor(transfer.astype(np.uint8), cv2.COLOR_LAB2BGR)
+    # return the color transferred image
+    return transfer
+def linear_color_transfer(target_img, source_img, mode='pca', eps=1e-5):
+    '''
+    Matches the colour distribution of the target image to that of the source image
+    using a linear transform.
+    Images are expected to be of form (w,h,c) and float in [0,1].
+    Modes are chol, pca or sym for different choices of basis.
+    '''
+    mu_t = target_img.mean(0).mean(0)
+    t = target_img - mu_t
+    t = t.transpose(2, 0, 1).reshape(t.shape[-1], -1)
+    Ct = t.dot(t.T) / t.shape[1] + eps * np.eye(t.shape[0])
+    mu_s = source_img.mean(0).mean(0)
+    s = source_img - mu_s
+    s = s.transpose(2, 0, 1).reshape(s.shape[-1], -1)
+    Cs = s.dot(s.T) / s.shape[1] + eps * np.eye(s.shape[0])
+    if mode == 'chol':
+        chol_t = np.linalg.cholesky(Ct)
+        chol_s = np.linalg.cholesky(Cs)
+        ts = chol_s.dot(np.linalg.inv(chol_t)).dot(t)
+    if mode == 'pca':
+        eva_t, eve_t = np.linalg.eigh(Ct)
+        Qt = eve_t.dot(np.sqrt(np.diag(eva_t))).dot(eve_t.T)
+        eva_s, eve_s = np.linalg.eigh(Cs)
+        Qs = eve_s.dot(np.sqrt(np.diag(eva_s))).dot(eve_s.T)
+        ts = Qs.dot(np.linalg.inv(Qt)).dot(t)
+    if mode == 'sym':
+        eva_t, eve_t = np.linalg.eigh(Ct)
+        Qt = eve_t.dot(np.sqrt(np.diag(eva_t))).dot(eve_t.T)
+        Qt_Cs_Qt = Qt.dot(Cs).dot(Qt)
+        eva_QtCsQt, eve_QtCsQt = np.linalg.eigh(Qt_Cs_Qt)
+        QtCsQt = eve_QtCsQt.dot(np.sqrt(np.diag(eva_QtCsQt))).dot(eve_QtCsQt.T)
+        ts = np.linalg.inv(Qt).dot(QtCsQt).dot(np.linalg.inv(Qt)).dot(t)
+    matched_img = ts.reshape(
+        *target_img.transpose(2, 0, 1).shape).transpose(1, 2, 0)
+    matched_img += mu_s
+    matched_img[matched_img > 1] = 1
+    matched_img[matched_img < 0] = 0
+    return np.clip(matched_img.astype(source_img.dtype), 0, 1)
+def lab_image_stats(image):
+    # compute the mean and standard deviation of each channel
+    (l, a, b) = cv2.split(image)
+    (lMean, lStd) = (l.mean(), l.std())
+    (aMean, aStd) = (a.mean(), a.std())
+    (bMean, bStd) = (b.mean(), b.std())
+    # return the color statistics
+    return (lMean, lStd, aMean, aStd, bMean, bStd)
+def _scale_array(arr, clip=True):
+    if clip:
+        return np.clip(arr, 0, 255)
+    mn = arr.min()
+    mx = arr.max()
+    scale_range = (max([mn, 0]), min([mx, 255]))
+    if mn < scale_range[0] or mx > scale_range[1]:
+        return (scale_range[1] - scale_range[0]) * (arr - mn) / (mx - mn) + scale_range[0]
+    return arr
+def channel_hist_match(source, template, hist_match_threshold=255, mask=None):
+    # Code borrowed from:
+    # https://stackoverflow.com/questions/32655686/histogram-matching-of-two-images-in-python-2-x
+    masked_source = source
+    masked_template = template
+    if mask is not None:
+        masked_source = source * mask
+        masked_template = template * mask
+    oldshape = source.shape
+    source = source.ravel()
+    template = template.ravel()
+    masked_source = masked_source.ravel()
+    masked_template = masked_template.ravel()
+    s_values, bin_idx, s_counts = np.unique(source, return_inverse=True,
+                                            return_counts=True)
+    t_values, t_counts = np.unique(template, return_counts=True)
+    s_quantiles = np.cumsum(s_counts).astype(np.float64)
+    s_quantiles = hist_match_threshold * s_quantiles / s_quantiles[-1]
+    t_quantiles = np.cumsum(t_counts).astype(np.float64)
+    t_quantiles = 255 * t_quantiles / t_quantiles[-1]
+    interp_t_values = np.interp(s_quantiles, t_quantiles, t_values)
+    return interp_t_values[bin_idx].reshape(oldshape)
+def color_hist_match(src_im, tar_im, hist_match_threshold=255, mask=None):
+    h, w, c = src_im.shape
+    matched_R = channel_hist_match(
+        src_im[:, :, 0], tar_im[:, :, 0], hist_match_threshold, mask)
+    matched_G = channel_hist_match(
+        src_im[:, :, 1], tar_im[:, :, 1], hist_match_threshold, mask)
+    matched_B = channel_hist_match(
+        src_im[:, :, 2], tar_im[:, :, 2], hist_match_threshold, mask)
+    to_stack = (matched_R, matched_G, matched_B)
+    for i in range(3, c):
+        to_stack += (src_im[:, :, i],)
+    matched = np.stack(to_stack, axis=-1).astype(src_im.dtype)
+    return matched
+def color_transfer_mix(img_src, img_trg):
+    img_src = np.clip(img_src*255.0, 0, 255).astype(np.uint8)
+    img_trg = np.clip(img_trg*255.0, 0, 255).astype(np.uint8)
+    img_src_lab = cv2.cvtColor(img_src, cv2.COLOR_BGR2LAB)
+    img_trg_lab = cv2.cvtColor(img_trg, cv2.COLOR_BGR2LAB)
+    rct_light = np.clip(linear_color_transfer(img_src_lab[..., 0:1].astype(np.float32)/255.0,
+                                              img_trg_lab[..., 0:1].astype(np.float32)/255.0)[..., 0]*255.0,
+                        0, 255).astype(np.uint8)
+    img_src_lab[..., 0] = (np.ones_like(rct_light)*100).astype(np.uint8)
+    img_src_lab = cv2.cvtColor(img_src_lab, cv2.COLOR_LAB2BGR)
+    img_trg_lab[..., 0] = (np.ones_like(rct_light)*100).astype(np.uint8)
+    img_trg_lab = cv2.cvtColor(img_trg_lab, cv2.COLOR_LAB2BGR)
+    img_rct = color_transfer_sot(img_src_lab.astype(
+        np.float32), img_trg_lab.astype(np.float32))
+    img_rct = np.clip(img_rct, 0, 255).astype(np.uint8)
+    img_rct = cv2.cvtColor(img_rct, cv2.COLOR_BGR2LAB)
+    img_rct[..., 0] = rct_light
+    img_rct = cv2.cvtColor(img_rct, cv2.COLOR_LAB2BGR)
+    return (img_rct / 255.0).astype(np.float32)
+def colorTransfer_fs(src_, dst_, mask):
+    src = dst_
+    dst = src_
+    transferredDst = np.copy(dst)
+    # indeksy nie czarnych pikseli maski
+    maskIndices = np.where(mask != 0)
+    # src[maskIndices[0], maskIndices[1]] zwraca piksele w nie czarnym obszarze maski
+    maskedSrc = src[maskIndices[0], maskIndices[1]].astype(np.int32)
+    maskedDst = dst[maskIndices[0], maskIndices[1]].astype(np.int32)
+    meanSrc = np.mean(maskedSrc, axis=0)
+    meanDst = np.mean(maskedDst, axis=0)
+    maskedDst = maskedDst - meanDst
+    maskedDst = maskedDst + meanSrc
+    maskedDst = np.clip(maskedDst, 0, 255)
+    transferredDst[maskIndices[0], maskIndices[1]] = maskedDst
+    return transferredDst
+def colorTransfer_avg(img_src, img_tgt, mask=None):
+    img_new = img_src.copy()
+    img_old = img_tgt.copy()
+    # print(mask)
+    if mask is not None:
+        img_new = (img_new*mask)#.astype(np.uint8)
+        img_old = (img_old*mask)#.astype(np.uint8)
+    # cv2.imshow('tgt', img_old)
+    w,h,c = img_new.shape
+    for i in range(img_new.shape[2]):
+        old_avg = img_old[:, :, i].mean()
+        new_avg = img_new[:, :, i].mean()
+        diff_int = old_avg - new_avg
+        # print(diff_int)
+        for m in range(img_new.shape[0]):
+            for n in range(img_new.shape[1]):
+                temp = img_new[m,n,i] + diff_int
+                temp = max(0., temp)
+                temp = min(1., temp)
+                # print(img_new[m,n,i], temp)
+                img_new[m,n,i] = temp
+    return img_new
+def color_transfer(ct_mode, img_src, img_trg, mask):
+    """
+    color transfer for [0,1] float32 inputs
+    """
+    img_src = img_src.astype(dtype=np.float32) / 255.0
+    img_trg = img_trg.astype(dtype=np.float32) / 255.0
+    if ct_mode == 'lct':
+        out = linear_color_transfer(img_src, img_trg)
+    elif ct_mode == 'rct':
+        out = reinhard_color_transfer(np.clip(img_src*255, 0, 255).astype(np.uint8),
+                                      np.clip(img_trg*255, 0,
+                                              255).astype(np.uint8),
+                                      preserve_paper=np.random.rand() < 0.5,
+                                      clip=np.random.rand() < 0.5)
+        out = np.clip(out.astype(np.float32) / 255.0, 0.0, 1.0)
+    elif ct_mode == 'rct-m':
+        out = reinhard_color_transfer(np.clip(img_src*255, 0, 255).astype(np.uint8),
+                                      np.clip(img_trg*255, 0,
+                                              255).astype(np.uint8),
+                                      source_mask=mask, target_mask=mask)
+                                      #preserve_paper=np.random.rand() < 0.5,
+                                      #clip=np.random.rand() < 0.5)
+        out = np.clip(out.astype(np.float32) / 255.0, 0.0, 1.0)
+    elif ct_mode == 'rct-fs':
+        out = colorTransfer_fs(np.clip(img_src*255, 0, 255).astype(np.uint8),
+                               np.clip(img_trg*255, 0, 255).astype(np.uint8), mask)
+        out = np.clip(out.astype(np.float32) / 255.0, 0.0, 1.0)
+    elif ct_mode == 'mkl':
+        out = color_transfer_mkl(img_src, img_trg)
+    elif ct_mode == 'mkl-m':
+        out = color_transfer_mkl(img_src*mask, img_trg*mask)
+    elif ct_mode == 'idt':
+        out = color_transfer_idt(img_src, img_trg)
+    elif ct_mode == 'idt-m':
+        out = color_transfer_idt(img_src*mask, img_trg*mask)
+    elif ct_mode == 'sot':
+        out = color_transfer_sot(img_src, img_trg)
+        out = np.clip(out, 0.0, 1.0)
+    elif ct_mode == 'sot-m':
+        out = color_transfer_sot(
+            (img_src*mask).astype(np.float32), (img_trg*mask).astype(np.float32))
+        out = np.clip(out, 0.0, 1.0)
+    elif ct_mode == 'mix-m':
+        out = color_transfer_mix(img_src*mask, img_trg*mask)
+    elif ct_mode == 'seamless-hist-match':
+        out = color_hist_match(img_src, img_trg)
+    elif ct_mode == 'seamless-hist-match-m':
+        out = color_hist_match(img_src, img_trg, mask=mask)
+    elif ct_mode == 'avg-align':
+        out = colorTransfer_avg(img_src, img_trg, mask=mask)
+        out = np.clip(out, 0.0, 1.0)
+    else:
+        raise ValueError(f"unknown ct_mode {ct_mode}")
+    out = np.clip(out*255, 0, 255).astype(np.uint8)
+    return out