anyantudre commited on
Commit
caa56d6
·
0 Parent(s):

moved from training repo to inference

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +35 -0
  2. .github/workflows/main.yml +28 -0
  3. .gitignore +12 -0
  4. app.py +132 -0
  5. inference.py +117 -0
  6. requirements.txt +33 -0
  7. training/config/__init__.py +7 -0
  8. training/config/config/__init__.py +7 -0
  9. training/config/config/backbone/cls_hrnet_w48.yaml +103 -0
  10. training/config/config/detector/efficientnetb4.yaml +88 -0
  11. training/config/config/detector/resnet34.yaml +87 -0
  12. training/config/config/detector/ucf.yaml +130 -0
  13. training/config/config/detector/xception.yaml +86 -0
  14. training/config/config/test_config.yaml +38 -0
  15. training/config/config/train_config.yaml +43 -0
  16. training/config/detector/efficientnetb4.yaml +88 -0
  17. training/config/detector/ucf.yaml +131 -0
  18. training/config/detector/xception.yaml +86 -0
  19. training/config/test_config.yaml +38 -0
  20. training/config/train_config.yaml +43 -0
  21. training/dataset/I2G_dataset.py +389 -0
  22. training/dataset/__init__.py +19 -0
  23. training/dataset/abstract_dataset.py +621 -0
  24. training/dataset/albu.py +99 -0
  25. training/dataset/face_utils.py +238 -0
  26. training/dataset/ff_blend.py +572 -0
  27. training/dataset/fwa_blend.py +548 -0
  28. training/dataset/generate_parsing_mask.py +129 -0
  29. training/dataset/generate_xray_nearest.py +136 -0
  30. training/dataset/iid_dataset.py +116 -0
  31. training/dataset/library/000_0000.png +0 -0
  32. training/dataset/library/001_0000.png +0 -0
  33. training/dataset/library/DeepFakeMask.py +181 -0
  34. training/dataset/library/LICENSE +674 -0
  35. training/dataset/library/README.md +12 -0
  36. training/dataset/library/all_in_one.jpg +0 -0
  37. training/dataset/library/bi_online_generation.py +241 -0
  38. training/dataset/library/precomuted_landmarks.json +1 -0
  39. training/dataset/lrl_dataset.py +139 -0
  40. training/dataset/lsda_dataset.py +382 -0
  41. training/dataset/pair_dataset.py +150 -0
  42. training/dataset/sbi_api.py +371 -0
  43. training/dataset/sbi_dataset.py +139 -0
  44. training/dataset/tall_dataset.py +183 -0
  45. training/dataset/utils/DeepFakeMask.py +402 -0
  46. training/dataset/utils/SLADD.py +163 -0
  47. training/dataset/utils/attribution_mask.py +55 -0
  48. training/dataset/utils/bi_online_generation.py +289 -0
  49. training/dataset/utils/bi_online_generation_yzy.py +268 -0
  50. training/dataset/utils/color_transfer.py +516 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.github/workflows/main.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CI
2
+
3
+ # Controls when the workflow will run
4
+ on:
5
+ # Triggers the workflow on push events but only for the "main" branch
6
+ push:
7
+ branches: [ "main" ]
8
+
9
+ # Allows you to run this workflow manually from the Actions tab
10
+ workflow_dispatch:
11
+
12
+ jobs:
13
+ sync-to-hub:
14
+ runs-on: ubuntu-latest
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ with:
18
+ fetch-depth: 0
19
+
20
+ - name: Add remote
21
+ env:
22
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
23
+ run: git remote add space https://ArissBandoss:[email protected]/spaces/ArissBandoss/DeepFake-Videos-Detection
24
+
25
+ - name: Push to hub
26
+ env:
27
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
28
+ run: git push --force https://ArissBandoss:[email protected]/spaces/ArissBandoss/DeepFake-Videos-Detection main
.gitignore ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .idea
2
+ *__pycache__*
3
+ *.vscode*
4
+ *.pyc
5
+ *.pth
6
+ *.pt
7
+ *.dat
8
+ audios-testing
9
+ temp_video_frames
10
+ .gradio
11
+ *.zip
12
+ *.npy
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import torch
4
+ import numpy as np
5
+ from torchvision import transforms
6
+ from PIL import Image
7
+ from tqdm import tqdm
8
+ from training.detectors import DETECTOR
9
+ import yaml
10
+ import gradio as gr
11
+
12
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
+
14
+ # available models in the repository
15
+ AVAILABLE_MODELS = [
16
+ "xception",
17
+ "ucf",
18
+ ]
19
+
20
+ # load the model
21
+ def load_model(model_name, config_path, weights_path):
22
+ with open(config_path, 'r') as f:
23
+ config = yaml.safe_load(f)
24
+
25
+ config['model_name'] = model_name
26
+
27
+ model_class = DETECTOR[model_name]
28
+ model = model_class(config).to(device)
29
+
30
+ checkpoint = torch.load(weights_path, map_location=device)
31
+ model.load_state_dict(checkpoint, strict=True)
32
+ model.eval()
33
+ return model
34
+
35
+ # preprocess a single video
36
+ def preprocess_video(video_path, output_dir, frame_num=32):
37
+ os.makedirs(output_dir, exist_ok=True)
38
+ frames_dir = os.path.join(output_dir, "frames")
39
+ os.makedirs(frames_dir, exist_ok=True)
40
+
41
+ cap = cv2.VideoCapture(video_path)
42
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
43
+ frame_indices = np.linspace(0, total_frames - 1, frame_num, dtype=int)
44
+
45
+ # extract frames
46
+ frames = []
47
+ for idx in frame_indices:
48
+ cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
49
+ ret, frame = cap.read()
50
+ if ret:
51
+ frame_path = os.path.join(frames_dir, f"frame_{idx:04d}.png")
52
+ cv2.imwrite(frame_path, frame)
53
+ frames.append(frame_path)
54
+
55
+ cap.release()
56
+ return frames
57
+
58
+ # inference on a single video
59
+ def infer_video(video_path, model, device):
60
+ # Preprocess the video
61
+ output_dir = "temp_video_frames"
62
+ frames = preprocess_video(video_path, output_dir)
63
+
64
+ transform = transforms.Compose([
65
+ transforms.Resize((256, 256)),
66
+ transforms.ToTensor(),
67
+ transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
68
+ ])
69
+
70
+ probs = []
71
+ for frame_path in frames:
72
+ frame = Image.open(frame_path).convert("RGB")
73
+ frame = transform(frame).unsqueeze(0).to(device)
74
+
75
+ data_dict = {
76
+ "image": frame,
77
+ "label": torch.tensor([0]).to(device), # Dummy label
78
+ "label_spe": torch.tensor([0]).to(device), # Dummy specific label
79
+ }
80
+
81
+ with torch.no_grad():
82
+ pred_dict = model(data_dict, inference=True)
83
+
84
+ logits = pred_dict["cls"] # Shape: [batch_size, num_classes]
85
+ prob = torch.softmax(logits, dim=1)[:, 1].item() # Probability of being "fake"
86
+ probs.append(prob)
87
+
88
+ # aggregate predictions (e.g., average probability)
89
+ avg_prob = np.mean(probs)
90
+ prediction = "Fake" if avg_prob > 0.5 else "Real"
91
+ return prediction, avg_prob
92
+
93
+ # gradio inference function
94
+ def gradio_inference(video, model_name):
95
+ config_path = f"/teamspace/studios/this_studio/DeepfakeBench/training/config/detector/{model_name}.yaml"
96
+ weights_path = f"/teamspace/studios/this_studio/DeepfakeBench/training/weights/{model_name}_best.pth"
97
+
98
+ if not os.path.exists(config_path):
99
+ return f"Error: Config file for model '{model_name}' not found at {config_path}."
100
+ if not os.path.exists(weights_path):
101
+ return f"Error: Weights file for model '{model_name}' not found at {weights_path}."
102
+
103
+ model = load_model(model_name, config_path, weights_path)
104
+
105
+ prediction, confidence = infer_video(video, model, device)
106
+ return f"Model: {model_name}\nPrediction: {prediction} (Confidence: {confidence:.4f})"
107
+
108
+ # Gradio App
109
+ def create_gradio_app():
110
+ with gr.Blocks() as demo:
111
+ gr.Markdown("# Deepfake Detection Demo")
112
+ gr.Markdown("Upload a video and select a model to detect if it's real or fake.")
113
+
114
+ with gr.Row():
115
+ video_input = gr.Video(label="Upload Video")
116
+ model_dropdown = gr.Dropdown(choices=AVAILABLE_MODELS, label="Select Model", value="xception")
117
+
118
+ output_text = gr.Textbox(label="Prediction Result")
119
+
120
+ submit_button = gr.Button("Run Inference")
121
+ submit_button.click(
122
+ fn=gradio_inference,
123
+ inputs=[video_input, model_dropdown],
124
+ outputs=output_text,
125
+ )
126
+
127
+ return demo
128
+
129
+
130
+ if __name__ == "__main__":
131
+ demo = create_gradio_app()
132
+ demo.launch(share=True)
inference.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import torch
4
+ import numpy as np
5
+ from torchvision import transforms
6
+ from PIL import Image
7
+ from tqdm import tqdm
8
+ from training.detectors import DETECTOR
9
+ import yaml
10
+
11
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
+
13
+ # load the model
14
+ def load_model(model_name, config_path, weights_path):
15
+ with open(config_path, 'r') as f:
16
+ config = yaml.safe_load(f)
17
+
18
+ config['model_name'] = model_name
19
+
20
+ model_class = DETECTOR[model_name]
21
+ model = model_class(config).to(device)
22
+
23
+ checkpoint = torch.load(weights_path, map_location=device)
24
+ model.load_state_dict(checkpoint, strict=True)
25
+ model.eval()
26
+ return model
27
+
28
+ # preprocess a single video
29
+ def preprocess_video(video_path, output_dir, frame_num=32):
30
+ os.makedirs(output_dir, exist_ok=True)
31
+ frames_dir = os.path.join(output_dir, "frames")
32
+ os.makedirs(frames_dir, exist_ok=True)
33
+
34
+ cap = cv2.VideoCapture(video_path)
35
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
36
+ frame_indices = np.linspace(0, total_frames - 1, frame_num, dtype=int)
37
+
38
+ # extract frames
39
+ frames = []
40
+ for idx in frame_indices:
41
+ cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
42
+ ret, frame = cap.read()
43
+ if ret:
44
+ frame_path = os.path.join(frames_dir, f"frame_{idx:04d}.png")
45
+ cv2.imwrite(frame_path, frame)
46
+ frames.append(frame_path)
47
+
48
+ cap.release()
49
+ return frames
50
+
51
+ # inference on a single video
52
+ def infer_video(video_path, model, device):
53
+ output_dir = "temp_video_frames"
54
+ frames = preprocess_video(video_path, output_dir)
55
+
56
+ transform = transforms.Compose([
57
+ transforms.Resize((256, 256)),
58
+ transforms.ToTensor(),
59
+ transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
60
+ ])
61
+
62
+ probs = []
63
+ for frame_path in frames:
64
+ frame = Image.open(frame_path).convert("RGB")
65
+ frame = transform(frame).unsqueeze(0).to(device)
66
+
67
+ data_dict = {
68
+ "image": frame,
69
+ "label": torch.tensor([0]).to(device), # Dummy label
70
+ "label_spe": torch.tensor([0]).to(device), # Dummy specific label
71
+ }
72
+
73
+ with torch.no_grad():
74
+ pred_dict = model(data_dict, inference=True)
75
+
76
+ logits = pred_dict["cls"] # Shape: [batch_size, num_classes]
77
+ prob = torch.softmax(logits, dim=1)[:, 1].item() # Probability of being "fake"
78
+ probs.append(prob)
79
+
80
+ avg_prob = np.mean(probs)
81
+ prediction = "Fake" if avg_prob > 0.5 else "Real"
82
+ return prediction, avg_prob
83
+
84
+ # main function for terminal-based inference
85
+ def main(video_filename, model_name):
86
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
87
+ config_path = f"/teamspace/studios/this_studio/DeepfakeBench/training/config/detector/{model_name}.yaml"
88
+ weights_path = f"/teamspace/studios/this_studio/DeepfakeBench/training/weights/{model_name}_best.pth"
89
+
90
+ if not os.path.exists(config_path):
91
+ print(f"Error: Config file for model '{model_name}' not found at {config_path}.")
92
+ return
93
+ if not os.path.exists(weights_path):
94
+ print(f"Error: Weights file for model '{model_name}' not found at {weights_path}.")
95
+ return
96
+
97
+ model = load_model(model_name, config_path, weights_path)
98
+
99
+ video_path = os.path.join(os.getcwd(), video_filename)
100
+ if not os.path.exists(video_path):
101
+ print(f"Error: Video file '{video_filename}' not found in the current directory.")
102
+ return
103
+
104
+ prediction, confidence = infer_video(video_path, model, device)
105
+ print(f"Model: {model_name}")
106
+ print(f"Prediction: {prediction} (Confidence: {confidence:.4f})")
107
+
108
+
109
+ if __name__ == "__main__":
110
+ import sys
111
+ if len(sys.argv) != 3:
112
+ print("Usage: python inference_script.py <video_filename> <model_name>")
113
+ print("Available models: xception, meso4, meso4Inception, efficientnetb4, ucf, etc.")
114
+ else:
115
+ video_filename = sys.argv[1]
116
+ model_name = sys.argv[2]
117
+ main(video_filename, model_name)
requirements.txt ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ numpy==1.21.5
2
+ pandas==1.4.2
3
+ Pillow==9.0.1
4
+ dlib==19.24.0
5
+ imageio==2.9.0
6
+ imgaug==0.4.0
7
+ tqdm==4.61.0
8
+ scipy==1.7.3
9
+ seaborn==0.11.2
10
+ pyyaml==6.0
11
+ imutils==0.5.4
12
+ opencv-python==4.6.0.66
13
+ scikit-image==0.19.2
14
+ scikit-learn==1.0.2
15
+ albumentations==1.1.0
16
+ torch==1.12.0
17
+ torchvision==0.13.0
18
+ torchaudio==0.12.0
19
+ efficientnet-pytorch==0.7.1
20
+ timm==0.6.12
21
+ segmentation-models-pytorch==0.3.2
22
+ torchtoolbox==0.1.8.2
23
+ tensorboard==2.10.1
24
+ setuptools==59.5.0
25
+ loralib
26
+ einops
27
+ transformers
28
+ filterpy
29
+ simplejson
30
+ kornia
31
+ fvcore
32
+ imgaug==0.4.0
33
+ git+https://github.com/openai/CLIP.git
training/config/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ current_file_path = os.path.abspath(__file__)
4
+ parent_dir = os.path.dirname(os.path.dirname(current_file_path))
5
+ project_root_dir = os.path.dirname(parent_dir)
6
+ sys.path.append(parent_dir)
7
+ sys.path.append(project_root_dir)
training/config/config/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ current_file_path = os.path.abspath(__file__)
4
+ parent_dir = os.path.dirname(os.path.dirname(current_file_path))
5
+ project_root_dir = os.path.dirname(parent_dir)
6
+ sys.path.append(parent_dir)
7
+ sys.path.append(project_root_dir)
training/config/config/backbone/cls_hrnet_w48.yaml ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDNN:
2
+ BENCHMARK: true
3
+ DETERMINISTIC: false
4
+ ENABLED: true
5
+ GPUS: (0,1,2,3)
6
+ OUTPUT_DIR: 'output'
7
+ LOG_DIR: 'log'
8
+ WORKERS: 4
9
+ PRINT_FREQ: 100
10
+
11
+ DATASET:
12
+ DATASET: lip
13
+ ROOT: 'data/'
14
+ TEST_SET: 'list/lip/valList.txt'
15
+ TRAIN_SET: 'list/lip/trainList.txt'
16
+ NUM_CLASSES: 20
17
+ MODEL:
18
+ NAME: cls_hrnet
19
+ #IMAGE_SIZE:
20
+ # - 224
21
+ # - 224
22
+ EXTRA:
23
+ STAGE1:
24
+ NUM_MODULES: 1
25
+ NUM_RANCHES: 1
26
+ BLOCK: BOTTLENECK
27
+ NUM_BLOCKS:
28
+ - 4
29
+ NUM_CHANNELS:
30
+ - 64
31
+ FUSE_METHOD: SUM
32
+ STAGE2:
33
+ NUM_MODULES: 1
34
+ NUM_BRANCHES: 2
35
+ BLOCK: BASIC
36
+ NUM_BLOCKS:
37
+ - 4
38
+ - 4
39
+ NUM_CHANNELS:
40
+ - 48
41
+ - 96
42
+ FUSE_METHOD: SUM
43
+ STAGE3:
44
+ NUM_MODULES: 4
45
+ NUM_BRANCHES: 3
46
+ BLOCK: BASIC
47
+ NUM_BLOCKS:
48
+ - 4
49
+ - 4
50
+ - 4
51
+ NUM_CHANNELS:
52
+ - 48
53
+ - 96
54
+ - 192
55
+ FUSE_METHOD: SUM
56
+ STAGE4:
57
+ NUM_MODULES: 3
58
+ NUM_BRANCHES: 4
59
+ BLOCK: BASIC
60
+ NUM_BLOCKS:
61
+ - 4
62
+ - 4
63
+ - 4
64
+ - 4
65
+ NUM_CHANNELS:
66
+ - 48
67
+ - 96
68
+ - 192
69
+ - 384
70
+ FUSE_METHOD: SUM
71
+ LOSS:
72
+ USE_OHEM: false
73
+ OHEMTHRES: 0.9
74
+ OHEMKEEP: 131072
75
+ TRAIN:
76
+ IMAGE_SIZE:
77
+ - 473
78
+ - 473
79
+ BASE_SIZE: 473
80
+ BATCH_SIZE_PER_GPU: 10
81
+ SHUFFLE: true
82
+ BEGIN_EPOCH: 0
83
+ END_EPOCH: 150
84
+ RESUME: true
85
+ OPTIMIZER: sgd
86
+ LR: 0.007
87
+ WD: 0.0005
88
+ MOMENTUM: 0.9
89
+ NESTEROV: false
90
+ FLIP: true
91
+ MULTI_SCALE: true
92
+ DOWNSAMPLERATE: 1
93
+ IGNORE_LABEL: 255
94
+ SCALE_FACTOR: 11
95
+ TEST:
96
+ IMAGE_SIZE:
97
+ - 473
98
+ - 473
99
+ BASE_SIZE: 473
100
+ BATCH_SIZE_PER_GPU: 16
101
+ NUM_SAMPLES: 2000
102
+ FLIP_TEST: false
103
+ MULTI_SCALE: false
training/config/config/detector/efficientnetb4.yaml ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # log dir
2
+ log_dir: logs/evaluations/effnb4
3
+
4
+ # model setting
5
+ # pretrained: /home/zhiyuanyan/disfin/deepfake_benchmark/training/pretrained/xception-b5690688.pth # path to a pre-trained model, if using one
6
+ pretrained: ./training/pretrained/efficientnet-b4-6ed6700e.pth # path to a pre-trained model, if using one
7
+ model_name: efficientnetb4 # model name
8
+ backbone_name: efficientnetb4 # backbone name
9
+
10
+ #backbone setting
11
+ backbone_config:
12
+ num_classes: 2
13
+ inc: 3
14
+ dropout: false
15
+ mode: Original
16
+
17
+ # dataset
18
+ all_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT, FaceShifter, DeepFakeDetection, Celeb-DF-v1, Celeb-DF-v2, DFDCP, DFDC, DeeperForensics-1.0, UADFV]
19
+ train_dataset: [FF-NT]
20
+ test_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT]
21
+
22
+ compression: c23 # compression-level for videos
23
+ train_batchSize: 32 # training batch size
24
+ test_batchSize: 32 # test batch size
25
+ workers: 8 # number of data loading workers
26
+ frame_num: {'train': 32, 'test': 32} # number of frames to use per video in training and testing
27
+ resolution: 256 # resolution of output image to network
28
+ with_mask: false # whether to include mask information in the input
29
+ with_landmark: false # whether to include facial landmark information in the input
30
+ save_ckpt: true # whether to save checkpoint
31
+ save_feat: true # whether to save features
32
+
33
+
34
+ # data augmentation
35
+ use_data_augmentation: true # Add this flag to enable/disable data augmentation
36
+ data_aug:
37
+ flip_prob: 0.5
38
+ rotate_prob: 0.5
39
+ rotate_limit: [-10, 10]
40
+ blur_prob: 0.5
41
+ blur_limit: [3, 7]
42
+ brightness_prob: 0.5
43
+ brightness_limit: [-0.1, 0.1]
44
+ contrast_limit: [-0.1, 0.1]
45
+ quality_lower: 40
46
+ quality_upper: 100
47
+
48
+ # mean and std for normalization
49
+ mean: [0.5, 0.5, 0.5]
50
+ std: [0.5, 0.5, 0.5]
51
+
52
+ # optimizer config
53
+ optimizer:
54
+ # choose between 'adam' and 'sgd'
55
+ type: adam
56
+ adam:
57
+ lr: 0.0002 # learning rate
58
+ beta1: 0.9 # beta1 for Adam optimizer
59
+ beta2: 0.999 # beta2 for Adam optimizer
60
+ eps: 0.00000001 # epsilon for Adam optimizer
61
+ weight_decay: 0.0005 # weight decay for regularization
62
+ amsgrad: false
63
+ sgd:
64
+ lr: 0.0002 # learning rate
65
+ momentum: 0.9 # momentum for SGD optimizer
66
+ weight_decay: 0.0005 # weight decay for regularization
67
+
68
+ # training config
69
+ lr_scheduler: null # learning rate scheduler
70
+ nEpochs: 10 # number of epochs to train for
71
+ start_epoch: 0 # manual epoch number (useful for restarts)
72
+ save_epoch: 1 # interval epochs for saving models
73
+ rec_iter: 100 # interval iterations for recording
74
+ logdir: ./logs # folder to output images and logs
75
+ manualSeed: 1024 # manual seed for random number generation
76
+ save_ckpt: false # whether to save checkpoint
77
+
78
+ # loss function
79
+ loss_func: cross_entropy # loss function to use
80
+ losstype: null
81
+
82
+ # metric
83
+ metric_scoring: auc # metric for evaluation (auc, acc, eer, ap)
84
+
85
+ # cuda
86
+
87
+ cuda: true # whether to use CUDA acceleration
88
+ cudnn: true # whether to use CuDNN for convolution operations
training/config/config/detector/resnet34.yaml ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # log dir
2
+ log_dir: /mntcephfs/lab_data/zhiyuanyan/benchmark_results/logs_final/resnet18
3
+
4
+ # model setting
5
+ pretrained: /home/zhiyuanyan/disfin/deepfake_benchmark/training/pretrained/resnet34-b627a593.pth # path to a pre-trained model, if using one
6
+ model_name: resnet34 # model name
7
+ backbone_name: resnet34 # backbone name
8
+
9
+ #backbone setting
10
+ backbone_config:
11
+ num_classes: 2
12
+ inc: 3
13
+ dropout: false
14
+ mode: Original
15
+
16
+ # dataset
17
+ all_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT, FaceShifter, DeepFakeDetection, Celeb-DF-v1, Celeb-DF-v2, DFDCP, DFDC, DeeperForensics-1.0, UADFV]
18
+ train_dataset: [FF-NT]
19
+ test_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT]
20
+
21
+ compression: c23 # compression-level for videos
22
+ train_batchSize: 32 # training batch size
23
+ test_batchSize: 32 # test batch size
24
+ workers: 8 # number of data loading workers
25
+ frame_num: {'train': 32, 'test': 32} # number of frames to use per video in training and testing
26
+ resolution: 256 # resolution of output image to network
27
+ with_mask: false # whether to include mask information in the input
28
+ with_landmark: false # whether to include facial landmark information in the input
29
+ save_ckpt: true # whether to save checkpoint
30
+ save_feat: true # whether to save features
31
+
32
+
33
+ # data augmentation
34
+ use_data_augmentation: true # Add this flag to enable/disable data augmentation
35
+ data_aug:
36
+ flip_prob: 0.5
37
+ rotate_prob: 0.5
38
+ rotate_limit: [-10, 10]
39
+ blur_prob: 0.5
40
+ blur_limit: [3, 7]
41
+ brightness_prob: 0.5
42
+ brightness_limit: [-0.1, 0.1]
43
+ contrast_limit: [-0.1, 0.1]
44
+ quality_lower: 40
45
+ quality_upper: 100
46
+
47
+ # mean and std for normalization
48
+ mean: [0.5, 0.5, 0.5]
49
+ std: [0.5, 0.5, 0.5]
50
+
51
+ # optimizer config
52
+ optimizer:
53
+ # choose between 'adam' and 'sgd'
54
+ type: adam
55
+ adam:
56
+ lr: 0.0002 # learning rate
57
+ beta1: 0.9 # beta1 for Adam optimizer
58
+ beta2: 0.999 # beta2 for Adam optimizer
59
+ eps: 0.00000001 # epsilon for Adam optimizer
60
+ weight_decay: 0.0005 # weight decay for regularization
61
+ amsgrad: false
62
+ sgd:
63
+ lr: 0.0002 # learning rate
64
+ momentum: 0.9 # momentum for SGD optimizer
65
+ weight_decay: 0.0005 # weight decay for regularization
66
+
67
+ # training config
68
+ lr_scheduler: null # learning rate scheduler
69
+ nEpochs: 10 # number of epochs to train for
70
+ start_epoch: 0 # manual epoch number (useful for restarts)
71
+ save_epoch: 1 # interval epochs for saving models
72
+ rec_iter: 100 # interval iterations for recording
73
+ logdir: ./logs # folder to output images and logs
74
+ manualSeed: 1024 # manual seed for random number generation
75
+ save_ckpt: false # whether to save checkpoint
76
+
77
+ # loss function
78
+ loss_func: cross_entropy # loss function to use
79
+ losstype: null
80
+
81
+ # metric
82
+ metric_scoring: auc # metric for evaluation (auc, acc, eer, ap)
83
+
84
+ # cuda
85
+
86
+ cuda: true # whether to use CUDA acceleration
87
+ cudnn: true # whether to use CuDNN for convolution operations
training/config/config/detector/ucf.yaml ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # log dir
2
+ log_dir: /data/home/zhiyuanyan/DeepfakeBench/debug_logs/ucf
3
+
4
+ # model setting
5
+ pretrained: ./training/pretrained/xception-b5690688.pth # path to a pre-trained model, if using one
6
+ # pretrained: '/home/zhiyuanyan/.cache/torch/hub/checkpoints/resnet34-b627a593.pth' # path to a pre-trained model, if using one
7
+ model_name: ucf # model name
8
+ backbone_name: xception # backbone name
9
+ encoder_feat_dim: 512 # feature dimension of the backbone
10
+
11
+ #backbone setting
12
+ backbone_config:
13
+ mode: adjust_channel
14
+ num_classes: 2
15
+ inc: 3
16
+ dropout: false
17
+
18
+ # dataset
19
+ all_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT, FaceShifter, DeepFakeDetection, Celeb-DF-v1, Celeb-DF-v2, DFDCP, DFDC, DeeperForensics-1.0, UADFV]
20
+ train_dataset: [FF-F2F, FF-DF, FF-FS, FF-NT,]
21
+ test_dataset: [Celeb-DF-v2]
22
+ dataset_type: pair
23
+
24
+ compression: c23 # compression-level for videos
25
+ train_batchSize: 16 # training batch size
26
+ test_batchSize: 32 # test batch size
27
+ workers: 8 # number of data loading workers
28
+ frame_num: {'train': 32, 'test': 32} # number of frames to use per video in training and testing
29
+ resolution: 256 # resolution of output image to network
30
+ with_mask: false # whether to include mask information in the input
31
+ with_landmark: false # whether to include facial landmark information in the input
32
+ save_feat: true # whether to save features
33
+
34
+ # label settings
35
+ label_dict:
36
+ # DFD
37
+ DFD_fake: 1
38
+ DFD_real: 0
39
+ FaceShifter: 1
40
+ FF-FH: 1
41
+ # FF++ + FaceShifter(FF-real+FF-FH)
42
+ # ucf specific label setting
43
+ FF-DF: 1
44
+ FF-F2F: 2
45
+ FF-FS: 3
46
+ FF-NT: 4
47
+ FF-real: 0
48
+ # CelebDF
49
+ CelebDFv1_real: 0
50
+ CelebDFv1_fake: 1
51
+ CelebDFv2_real: 0
52
+ CelebDFv2_fake: 1
53
+ # DFDCP
54
+ DFDCP_Real: 0
55
+ DFDCP_FakeA: 1
56
+ DFDCP_FakeB: 1
57
+ # DFDC
58
+ DFDC_Fake: 1
59
+ DFDC_Real: 0
60
+ # DeeperForensics-1.0
61
+ DF_fake: 1
62
+ DF_real: 0
63
+ # UADFV
64
+ UADFV_Fake: 1
65
+ UADFV_Real: 0
66
+ # roop
67
+ roop_Fake: 1
68
+ roop_Real: 0
69
+
70
+
71
+
72
+ # data augmentation
73
+ use_data_augmentation: true # Add this flag to enable/disable data augmentation
74
+ data_aug:
75
+ flip_prob: 0.5
76
+ rotate_prob: 0.5
77
+ rotate_limit: [-10, 10]
78
+ blur_prob: 0.5
79
+ blur_limit: [3, 7]
80
+ brightness_prob: 0.5
81
+ brightness_limit: [-0.1, 0.1]
82
+ contrast_limit: [-0.1, 0.1]
83
+ quality_lower: 40
84
+ quality_upper: 100
85
+
86
+ # mean and std for normalization
87
+ mean: [0.5, 0.5, 0.5]
88
+ std: [0.5, 0.5, 0.5]
89
+
90
+ # optimizer config
91
+ optimizer:
92
+ # choose between 'adam' and 'sgd'
93
+ type: adam
94
+ adam:
95
+ lr: 0.0002 # learning rate
96
+ beta1: 0.9 # beta1 for Adam optimizer
97
+ beta2: 0.999 # beta2 for Adam optimizer
98
+ eps: 0.00000001 # epsilon for Adam optimizer
99
+ weight_decay: 0.0005 # weight decay for regularization
100
+ amsgrad: false
101
+ sgd:
102
+ lr: 0.0002 # learning rate
103
+ momentum: 0.9 # momentum for SGD optimizer
104
+ weight_decay: 0.0005 # weight decay for regularization
105
+
106
+ # training config
107
+ lr_scheduler: null # learning rate scheduler
108
+ nEpochs: 5 # number of epochs to train for
109
+ start_epoch: 0 # manual epoch number (useful for restarts)
110
+ save_epoch: 1 # interval epochs for saving models
111
+ rec_iter: 100 # interval iterations for recording
112
+ logdir: ./logs # folder to output images and logs
113
+ manualSeed: 1024 # manual seed for random number generation
114
+ save_ckpt: false # whether to save checkpoint
115
+
116
+ # loss function
117
+ loss_func:
118
+ cls_loss: cross_entropy # loss function to use
119
+ spe_loss: cross_entropy
120
+ con_loss: contrastive_regularization
121
+ rec_loss: l1loss
122
+ losstype: null
123
+
124
+ # metric
125
+ metric_scoring: auc # metric for evaluation (auc, acc, eer, ap)
126
+
127
+ # cuda
128
+
129
+ cuda: true # whether to use CUDA acceleration
130
+ cudnn: true # whether to use CuDNN for convolution operations
training/config/config/detector/xception.yaml ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # log dir
2
+ log_dir: /data/home/zhiyuanyan/DeepfakeBench/logs/testing_bench
3
+
4
+ # model setting
5
+ pretrained: /teamspace/studios/this_studio/DeepfakeBench/training/pretrained/xception-b5690688.pth # path to a pre-trained model, if using one
6
+ model_name: xception # model name
7
+ backbone_name: xception # backbone name
8
+
9
+ #backbone setting
10
+ backbone_config:
11
+ mode: original
12
+ num_classes: 2
13
+ inc: 3
14
+ dropout: false
15
+
16
+ # dataset
17
+ all_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT, FaceShifter, DeepFakeDetection, Celeb-DF-v1, Celeb-DF-v2, DFDCP, DFDC, DeeperForensics-1.0, UADFV]
18
+ train_dataset: [Celeb-DF-v1, DFDCP, UADFV]
19
+ test_dataset: [Celeb-DF-v1, DFDCP, UADFV]
20
+
21
+ compression: c23 # compression-level for videos
22
+ train_batchSize: 32 # training batch size
23
+ test_batchSize: 32 # test batch size
24
+ workers: 8 # number of data loading workers
25
+ frame_num: {'train': 32, 'test': 32} # number of frames to use per video in training and testing
26
+ resolution: 256 # resolution of output image to network
27
+ with_mask: false # whether to include mask information in the input
28
+ with_landmark: false # whether to include facial landmark information in the input
29
+
30
+
31
+ # data augmentation
32
+ use_data_augmentation: true # Add this flag to enable/disable data augmentation
33
+ data_aug:
34
+ flip_prob: 0.5
35
+ rotate_prob: 0.0
36
+ rotate_limit: [-10, 10]
37
+ blur_prob: 0.5
38
+ blur_limit: [3, 7]
39
+ brightness_prob: 0.5
40
+ brightness_limit: [-0.1, 0.1]
41
+ contrast_limit: [-0.1, 0.1]
42
+ quality_lower: 40
43
+ quality_upper: 100
44
+
45
+ # mean and std for normalization
46
+ mean: [0.5, 0.5, 0.5]
47
+ std: [0.5, 0.5, 0.5]
48
+
49
+ # optimizer config
50
+ optimizer:
51
+ # choose between 'adam' and 'sgd'
52
+ type: adam
53
+ adam:
54
+ lr: 0.0002 # learning rate
55
+ beta1: 0.9 # beta1 for Adam optimizer
56
+ beta2: 0.999 # beta2 for Adam optimizer
57
+ eps: 0.00000001 # epsilon for Adam optimizer
58
+ weight_decay: 0.0005 # weight decay for regularization
59
+ amsgrad: false
60
+ sgd:
61
+ lr: 0.0002 # learning rate
62
+ momentum: 0.9 # momentum for SGD optimizer
63
+ weight_decay: 0.0005 # weight decay for regularization
64
+
65
+ # training config
66
+ lr_scheduler: null # learning rate scheduler
67
+ nEpochs: 10 # number of epochs to train for
68
+ start_epoch: 0 # manual epoch number (useful for restarts)
69
+ save_epoch: 1 # interval epochs for saving models
70
+ rec_iter: 100 # interval iterations for recording
71
+ logdir: ./logs # folder to output images and logs
72
+ manualSeed: 1024 # manual seed for random number generation
73
+ save_ckpt: true # whether to save checkpoint
74
+ save_feat: true # whether to save features
75
+
76
+ # loss function
77
+ loss_func: cross_entropy # loss function to use
78
+ losstype: null
79
+
80
+ # metric
81
+ metric_scoring: auc # metric for evaluation (auc, acc, eer, ap)
82
+
83
+ # cuda
84
+
85
+ cuda: true # whether to use CUDA acceleration
86
+ cudnn: true # whether to use CuDNN for convolution operations
training/config/config/test_config.yaml ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mode: test
2
+ lmdb: False
3
+ dataset_root_rgb: './datasets'
4
+ lmdb_dir: 'I:\transform_2_lmdb'
5
+ dataset_json_folder: '/teamspace/studios/this_studio/DeepfakeBench/preprocessing/dataset_json'
6
+ label_dict:
7
+ # DFD
8
+ DFD_fake: 1
9
+ DFD_real: 0
10
+ # FF++ + FaceShifter(FF-real+FF-FH)
11
+ FF-SH: 1
12
+ FF-F2F: 1
13
+ FF-DF: 1
14
+ FF-FS: 1
15
+ FF-NT: 1
16
+ FF-FH: 1
17
+ FF-real: 0
18
+ # CelebDF
19
+ CelebDFv1_real: 0
20
+ CelebDFv1_fake: 1
21
+ CelebDFv2_real: 0
22
+ CelebDFv2_fake: 1
23
+ # DFDCP
24
+ DFDCP_Real: 0
25
+ DFDCP_FakeA: 1
26
+ DFDCP_FakeB: 1
27
+ # DFDC
28
+ DFDC_Fake: 1
29
+ DFDC_Real: 0
30
+ # DeeperForensics-1.0
31
+ DF_fake: 1
32
+ DF_real: 0
33
+ # UADFV
34
+ UADFV_Fake: 1
35
+ UADFV_Real: 0
36
+ # Roop
37
+ roop_Real: 0
38
+ roop_Fake: 1
training/config/config/train_config.yaml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mode: train
2
+ lmdb: False
3
+ dry_run: false
4
+ dataset_root_rgb: './datasets'
5
+ lmdb_dir: 'I:\transform_2_lmdb'
6
+ dataset_json_folder: '/teamspace/studios/this_studio/DeepfakeBench/preprocessing/dataset_json'
7
+ SWA: False
8
+ save_avg: True
9
+ log_dir: ./logs/training/
10
+ # label settings
11
+ label_dict:
12
+ # DFD
13
+ DFD_fake: 1
14
+ DFD_real: 0
15
+ # FF++ + FaceShifter(FF-real+FF-FH)
16
+ FF-SH: 1
17
+ FF-F2F: 1
18
+ FF-DF: 1
19
+ FF-FS: 1
20
+ FF-NT: 1
21
+ FF-FH: 1
22
+ FF-real: 0
23
+ # CelebDF
24
+ CelebDFv1_real: 0
25
+ CelebDFv1_fake: 1
26
+ CelebDFv2_real: 0
27
+ CelebDFv2_fake: 1
28
+ # DFDCP
29
+ DFDCP_Real: 0
30
+ DFDCP_FakeA: 1
31
+ DFDCP_FakeB: 1
32
+ # DFDC
33
+ DFDC_Fake: 1
34
+ DFDC_Real: 0
35
+ # DeeperForensics-1.0
36
+ DF_fake: 1
37
+ DF_real: 0
38
+ # UADFV
39
+ UADFV_Fake: 1
40
+ UADFV_Real: 0
41
+ # Roop
42
+ roop_Real: 0
43
+ roop_Fake: 1
training/config/detector/efficientnetb4.yaml ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # log dir
2
+ log_dir: logs/evaluations/effnb4
3
+
4
+ # model setting
5
+ # pretrained: /home/zhiyuanyan/disfin/deepfake_benchmark/training/pretrained/xception-b5690688.pth # path to a pre-trained model, if using one
6
+ pretrained: ./training/pretrained/efficientnet-b4-6ed6700e.pth # path to a pre-trained model, if using one
7
+ model_name: efficientnetb4 # model name
8
+ backbone_name: efficientnetb4 # backbone name
9
+
10
+ #backbone setting
11
+ backbone_config:
12
+ num_classes: 2
13
+ inc: 3
14
+ dropout: false
15
+ mode: Original
16
+
17
+ # dataset
18
+ all_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT, FaceShifter, DeepFakeDetection, Celeb-DF-v1, Celeb-DF-v2, DFDCP, DFDC, DeeperForensics-1.0, UADFV]
19
+ train_dataset: [FF-NT]
20
+ test_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT]
21
+
22
+ compression: c23 # compression-level for videos
23
+ train_batchSize: 32 # training batch size
24
+ test_batchSize: 32 # test batch size
25
+ workers: 8 # number of data loading workers
26
+ frame_num: {'train': 32, 'test': 32} # number of frames to use per video in training and testing
27
+ resolution: 256 # resolution of output image to network
28
+ with_mask: false # whether to include mask information in the input
29
+ with_landmark: false # whether to include facial landmark information in the input
30
+ save_ckpt: true # whether to save checkpoint
31
+ save_feat: true # whether to save features
32
+
33
+
34
+ # data augmentation
35
+ use_data_augmentation: true # Add this flag to enable/disable data augmentation
36
+ data_aug:
37
+ flip_prob: 0.5
38
+ rotate_prob: 0.5
39
+ rotate_limit: [-10, 10]
40
+ blur_prob: 0.5
41
+ blur_limit: [3, 7]
42
+ brightness_prob: 0.5
43
+ brightness_limit: [-0.1, 0.1]
44
+ contrast_limit: [-0.1, 0.1]
45
+ quality_lower: 40
46
+ quality_upper: 100
47
+
48
+ # mean and std for normalization
49
+ mean: [0.5, 0.5, 0.5]
50
+ std: [0.5, 0.5, 0.5]
51
+
52
+ # optimizer config
53
+ optimizer:
54
+ # choose between 'adam' and 'sgd'
55
+ type: adam
56
+ adam:
57
+ lr: 0.0002 # learning rate
58
+ beta1: 0.9 # beta1 for Adam optimizer
59
+ beta2: 0.999 # beta2 for Adam optimizer
60
+ eps: 0.00000001 # epsilon for Adam optimizer
61
+ weight_decay: 0.0005 # weight decay for regularization
62
+ amsgrad: false
63
+ sgd:
64
+ lr: 0.0002 # learning rate
65
+ momentum: 0.9 # momentum for SGD optimizer
66
+ weight_decay: 0.0005 # weight decay for regularization
67
+
68
+ # training config
69
+ lr_scheduler: null # learning rate scheduler
70
+ nEpochs: 10 # number of epochs to train for
71
+ start_epoch: 0 # manual epoch number (useful for restarts)
72
+ save_epoch: 1 # interval epochs for saving models
73
+ rec_iter: 100 # interval iterations for recording
74
+ logdir: ./logs # folder to output images and logs
75
+ manualSeed: 1024 # manual seed for random number generation
76
+ save_ckpt: false # whether to save checkpoint
77
+
78
+ # loss function
79
+ loss_func: cross_entropy # loss function to use
80
+ losstype: null
81
+
82
+ # metric
83
+ metric_scoring: auc # metric for evaluation (auc, acc, eer, ap)
84
+
85
+ # cuda
86
+
87
+ cuda: true # whether to use CUDA acceleration
88
+ cudnn: true # whether to use CuDNN for convolution operations
training/config/detector/ucf.yaml ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # log dir
2
+ log_dir: /data/home/zhiyuanyan/DeepfakeBench/debug_logs/ucf
3
+
4
+ # model setting
5
+ pretrained: /teamspace/studios/this_studio/DeepfakeBench/training/pretrained/xception-b5690688.pth # path to a pre-trained model, if using one
6
+ # pretrained: '/home/zhiyuanyan/.cache/torch/hub/checkpoints/resnet34-b627a593.pth' # path to a pre-trained model, if using one
7
+ model_name: ucf # model name
8
+ backbone_name: xception # backbone name
9
+ encoder_feat_dim: 512 # feature dimension of the backbone
10
+
11
+ #backbone setting
12
+ backbone_config:
13
+ mode: adjust_channel
14
+ num_classes: 2
15
+ inc: 3
16
+ dropout: false
17
+
18
+ # dataset
19
+ all_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT, FaceShifter, DeepFakeDetection, Celeb-DF-v1, Celeb-DF-v2, DFDCP, DFDC, DeeperForensics-1.0, UADFV]
20
+ train_dataset: [FF-F2F, FF-DF, FF-FS, FF-NT,]
21
+ test_dataset: [Celeb-DF-v2]
22
+ dataset_type: pair
23
+
24
+ compression: c23 # compression-level for videos
25
+ train_batchSize: 16 # training batch size
26
+ test_batchSize: 32 # test batch size
27
+ workers: 8 # number of data loading workers
28
+ frame_num: {'train': 32, 'test': 32} # number of frames to use per video in training and testing
29
+ resolution: 256 # resolution of output image to network
30
+ with_mask: false # whether to include mask information in the input
31
+ with_landmark: false # whether to include facial landmark information in the input
32
+ save_ckpt: true # whether to save checkpoint
33
+ save_feat: true # whether to save features
34
+
35
+ # label settings
36
+ label_dict:
37
+ # DFD
38
+ DFD_fake: 1
39
+ DFD_real: 0
40
+ FaceShifter: 1
41
+ FF-FH: 1
42
+ # FF++ + FaceShifter(FF-real+FF-FH)
43
+ # ucf specific label setting
44
+ FF-DF: 1
45
+ FF-F2F: 2
46
+ FF-FS: 3
47
+ FF-NT: 4
48
+ FF-real: 0
49
+ # CelebDF
50
+ CelebDFv1_real: 0
51
+ CelebDFv1_fake: 1
52
+ CelebDFv2_real: 0
53
+ CelebDFv2_fake: 1
54
+ # DFDCP
55
+ DFDCP_Real: 0
56
+ DFDCP_FakeA: 1
57
+ DFDCP_FakeB: 1
58
+ # DFDC
59
+ DFDC_Fake: 1
60
+ DFDC_Real: 0
61
+ # DeeperForensics-1.0
62
+ DF_fake: 1
63
+ DF_real: 0
64
+ # UADFV
65
+ UADFV_Fake: 1
66
+ UADFV_Real: 0
67
+ # roop
68
+ roop_Fake: 1
69
+ roop_Real: 0
70
+
71
+
72
+
73
+ # data augmentation
74
+ use_data_augmentation: true # Add this flag to enable/disable data augmentation
75
+ data_aug:
76
+ flip_prob: 0.5
77
+ rotate_prob: 0.5
78
+ rotate_limit: [-10, 10]
79
+ blur_prob: 0.5
80
+ blur_limit: [3, 7]
81
+ brightness_prob: 0.5
82
+ brightness_limit: [-0.1, 0.1]
83
+ contrast_limit: [-0.1, 0.1]
84
+ quality_lower: 40
85
+ quality_upper: 100
86
+
87
+ # mean and std for normalization
88
+ mean: [0.5, 0.5, 0.5]
89
+ std: [0.5, 0.5, 0.5]
90
+
91
+ # optimizer config
92
+ optimizer:
93
+ # choose between 'adam' and 'sgd'
94
+ type: adam
95
+ adam:
96
+ lr: 0.0002 # learning rate
97
+ beta1: 0.9 # beta1 for Adam optimizer
98
+ beta2: 0.999 # beta2 for Adam optimizer
99
+ eps: 0.00000001 # epsilon for Adam optimizer
100
+ weight_decay: 0.0005 # weight decay for regularization
101
+ amsgrad: false
102
+ sgd:
103
+ lr: 0.0002 # learning rate
104
+ momentum: 0.9 # momentum for SGD optimizer
105
+ weight_decay: 0.0005 # weight decay for regularization
106
+
107
+ # training config
108
+ lr_scheduler: null # learning rate scheduler
109
+ nEpochs: 5 # number of epochs to train for
110
+ start_epoch: 0 # manual epoch number (useful for restarts)
111
+ save_epoch: 1 # interval epochs for saving models
112
+ rec_iter: 100 # interval iterations for recording
113
+ logdir: ./logs # folder to output images and logs
114
+ manualSeed: 1024 # manual seed for random number generation
115
+ save_ckpt: false # whether to save checkpoint
116
+
117
+ # loss function
118
+ loss_func:
119
+ cls_loss: cross_entropy # loss function to use
120
+ spe_loss: cross_entropy
121
+ con_loss: contrastive_regularization
122
+ rec_loss: l1loss
123
+ losstype: null
124
+
125
+ # metric
126
+ metric_scoring: auc # metric for evaluation (auc, acc, eer, ap)
127
+
128
+ # cuda
129
+
130
+ cuda: true # whether to use CUDA acceleration
131
+ cudnn: true # whether to use CuDNN for convolution operations
training/config/detector/xception.yaml ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # log dir
2
+ log_dir: /teamspace/studios/this_studio/DeepfakeBench/logs/testing_bench
3
+
4
+ # model setting
5
+ pretrained: /teamspace/studios/this_studio/DeepfakeBench/training/pretrained/xception-b5690688.pth # path to a pre-trained model, if using one
6
+ model_name: xception # model name
7
+ backbone_name: xception # backbone name
8
+
9
+ #backbone setting
10
+ backbone_config:
11
+ mode: original
12
+ num_classes: 2
13
+ inc: 3
14
+ dropout: false
15
+
16
+ # dataset
17
+ all_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT, FaceShifter, DeepFakeDetection, Celeb-DF-v1, Celeb-DF-v2, DFDCP, DFDC, DeeperForensics-1.0, UADFV]
18
+ train_dataset: [Celeb-DF-v1, DFDCP]
19
+ test_dataset: [UADFV]
20
+
21
+ compression: c23 # compression-level for videos
22
+ train_batchSize: 32 # training batch size
23
+ test_batchSize: 32 # test batch size
24
+ workers: 8 # number of data loading workers
25
+ frame_num: {'train': 32, 'test': 32} # number of frames to use per video in training and testing
26
+ resolution: 256 # resolution of output image to network
27
+ with_mask: false # whether to include mask information in the input
28
+ with_landmark: false # whether to include facial landmark information in the input
29
+
30
+
31
+ # data augmentation
32
+ use_data_augmentation: true # Add this flag to enable/disable data augmentation
33
+ data_aug:
34
+ flip_prob: 0.5
35
+ rotate_prob: 0.0
36
+ rotate_limit: [-10, 10]
37
+ blur_prob: 0.5
38
+ blur_limit: [3, 7]
39
+ brightness_prob: 0.5
40
+ brightness_limit: [-0.1, 0.1]
41
+ contrast_limit: [-0.1, 0.1]
42
+ quality_lower: 40
43
+ quality_upper: 100
44
+
45
+ # mean and std for normalization
46
+ mean: [0.5, 0.5, 0.5]
47
+ std: [0.5, 0.5, 0.5]
48
+
49
+ # optimizer config
50
+ optimizer:
51
+ # choose between 'adam' and 'sgd'
52
+ type: adam
53
+ adam:
54
+ lr: 0.0002 # learning rate
55
+ beta1: 0.9 # beta1 for Adam optimizer
56
+ beta2: 0.999 # beta2 for Adam optimizer
57
+ eps: 0.00000001 # epsilon for Adam optimizer
58
+ weight_decay: 0.0005 # weight decay for regularization
59
+ amsgrad: false
60
+ sgd:
61
+ lr: 0.0002 # learning rate
62
+ momentum: 0.9 # momentum for SGD optimizer
63
+ weight_decay: 0.0005 # weight decay for regularization
64
+
65
+ # training config
66
+ lr_scheduler: null # learning rate scheduler
67
+ nEpochs: 10 # number of epochs to train for
68
+ start_epoch: 0 # manual epoch number (useful for restarts)
69
+ save_epoch: 1 # interval epochs for saving models
70
+ rec_iter: 100 # interval iterations for recording
71
+ logdir: ./logs # folder to output images and logs
72
+ manualSeed: 1024 # manual seed for random number generation
73
+ save_ckpt: true # whether to save checkpoint
74
+ save_feat: true # whether to save features
75
+
76
+ # loss function
77
+ loss_func: cross_entropy # loss function to use
78
+ losstype: null
79
+
80
+ # metric
81
+ metric_scoring: auc # metric for evaluation (auc, acc, eer, ap)
82
+
83
+ # cuda
84
+
85
+ cuda: true # whether to use CUDA acceleration
86
+ cudnn: true # whether to use CuDNN for convolution operations
training/config/test_config.yaml ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mode: test
2
+ lmdb: False
3
+ rgb_dir: ''
4
+ lmdb_dir: './datasets/lmdb'
5
+ dataset_json_folder: './preprocessing/dataset_json'
6
+ label_dict:
7
+ # DFD
8
+ DFD_fake: 1
9
+ DFD_real: 0
10
+ # FF++ + FaceShifter(FF-real+FF-FH)
11
+ FF-SH: 1
12
+ FF-F2F: 1
13
+ FF-DF: 1
14
+ FF-FS: 1
15
+ FF-NT: 1
16
+ FF-FH: 1
17
+ FF-real: 0
18
+ # CelebDF
19
+ CelebDFv1_real: 0
20
+ CelebDFv1_fake: 1
21
+ CelebDFv2_real: 0
22
+ CelebDFv2_fake: 1
23
+ # DFDCP
24
+ DFDCP_Real: 0
25
+ DFDCP_FakeA: 1
26
+ DFDCP_FakeB: 1
27
+ # DFDC
28
+ DFDC_Fake: 1
29
+ DFDC_Real: 0
30
+ # DeeperForensics-1.0
31
+ DF_fake: 1
32
+ DF_real: 0
33
+ # UADFV
34
+ UADFV_Fake: 1
35
+ UADFV_Real: 0
36
+ # Roop
37
+ roop_Real: 0
38
+ roop_Fake: 1
training/config/train_config.yaml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mode: train
2
+ lmdb: False
3
+ dry_run: false
4
+ rgb_dir: ''
5
+ lmdb_dir: './datasets/lmdb'
6
+ dataset_json_folder: './preprocessing/dataset_json'
7
+ SWA: False
8
+ save_avg: True
9
+ log_dir: ./logs/training/
10
+ # label settings
11
+ label_dict:
12
+ # DFD
13
+ DFD_fake: 1
14
+ DFD_real: 0
15
+ # FF++ + FaceShifter(FF-real+FF-FH)
16
+ FF-SH: 1
17
+ FF-F2F: 1
18
+ FF-DF: 1
19
+ FF-FS: 1
20
+ FF-NT: 1
21
+ FF-FH: 1
22
+ FF-real: 0
23
+ # CelebDF
24
+ CelebDFv1_real: 0
25
+ CelebDFv1_fake: 1
26
+ CelebDFv2_real: 0
27
+ CelebDFv2_fake: 1
28
+ # DFDCP
29
+ DFDCP_Real: 0
30
+ DFDCP_FakeA: 1
31
+ DFDCP_FakeB: 1
32
+ # DFDC
33
+ DFDC_Fake: 1
34
+ DFDC_Real: 0
35
+ # DeeperForensics-1.0
36
+ DF_fake: 1
37
+ DF_real: 0
38
+ # UADFV
39
+ UADFV_Fake: 1
40
+ UADFV_Real: 0
41
+ # Roop
42
+ roop_Real: 0
43
+ roop_Fake: 1
training/dataset/I2G_dataset.py ADDED
@@ -0,0 +1,389 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Created by: Kaede Shiohara
2
+ # Yamasaki Lab at The University of Tokyo
3
4
+ # Copyright (c) 2021
5
+ # 3rd party softwares' licenses are noticed at https://github.com/mapooon/SelfBlendedImages/blob/master/LICENSE
6
+ import logging
7
+ import os
8
+ import pickle
9
+
10
+ import cv2
11
+ import numpy as np
12
+ import scipy as sp
13
+ import yaml
14
+ from skimage.measure import label, regionprops
15
+ import random
16
+ from PIL import Image
17
+ import sys
18
+ import albumentations as A
19
+ from torch.utils.data import DataLoader
20
+ from dataset.utils.bi_online_generation import random_get_hull
21
+ from dataset.abstract_dataset import DeepfakeAbstractBaseDataset
22
+ from dataset.pair_dataset import pairDataset
23
+ import torch
24
+
25
+ class RandomDownScale(A.core.transforms_interface.ImageOnlyTransform):
26
+ def apply(self, img, ratio_list=None, **params):
27
+ if ratio_list is None:
28
+ ratio_list = [2, 4]
29
+ r = ratio_list[np.random.randint(len(ratio_list))]
30
+ return self.randomdownscale(img, r)
31
+
32
+ def randomdownscale(self, img, r):
33
+ keep_ratio = True
34
+ keep_input_shape = True
35
+ H, W, C = img.shape
36
+
37
+ img_ds = cv2.resize(img, (int(W / r), int(H / r)), interpolation=cv2.INTER_NEAREST)
38
+ if keep_input_shape:
39
+ img_ds = cv2.resize(img_ds, (W, H), interpolation=cv2.INTER_LINEAR)
40
+
41
+ return img_ds
42
+
43
+
44
+ '''
45
+ from PIL import ImageDraw
46
+ # 创建一个可以在图像上绘制的对象
47
+ img_pil=Image.fromarray(img)
48
+ draw = ImageDraw.Draw(img_pil)
49
+
50
+ # 在图像上绘制点
51
+ for i, point in enumerate(landmark):
52
+ x, y = point
53
+ radius = 1 # 点的半径
54
+ draw.ellipse((x-radius, y-radius, x+radius, y+radius), fill="red")
55
+ draw.text((x+radius+2, y-radius), str(i), fill="black") # 在点旁边添加标签
56
+ img_pil.show()
57
+
58
+ '''
59
+
60
+ def alpha_blend(source, target, mask):
61
+ mask_blured = get_blend_mask(mask)
62
+ img_blended = (mask_blured * source + (1 - mask_blured) * target)
63
+ return img_blended, mask_blured
64
+
65
+
66
+ def dynamic_blend(source, target, mask):
67
+ mask_blured = get_blend_mask(mask)
68
+ # worth consideration, 1 in the official paper, 0.25, 0.5, 0.75,1,1,1 in sbi.
69
+ blend_list = [1, 1, 1]
70
+ blend_ratio = blend_list[np.random.randint(len(blend_list))]
71
+ mask_blured *= blend_ratio
72
+ img_blended = (mask_blured * source + (1 - mask_blured) * target)
73
+ return img_blended, mask_blured
74
+
75
+
76
+ def get_blend_mask(mask):
77
+ H, W = mask.shape
78
+ size_h = np.random.randint(192, 257)
79
+ size_w = np.random.randint(192, 257)
80
+ mask = cv2.resize(mask, (size_w, size_h))
81
+ kernel_1 = random.randrange(5, 26, 2)
82
+ kernel_1 = (kernel_1, kernel_1)
83
+ kernel_2 = random.randrange(5, 26, 2)
84
+ kernel_2 = (kernel_2, kernel_2)
85
+
86
+ mask_blured = cv2.GaussianBlur(mask, kernel_1, 0)
87
+ mask_blured = mask_blured / (mask_blured.max())
88
+ mask_blured[mask_blured < 1] = 0
89
+
90
+ mask_blured = cv2.GaussianBlur(mask_blured, kernel_2, np.random.randint(5, 46))
91
+ mask_blured = mask_blured / (mask_blured.max())
92
+ mask_blured = cv2.resize(mask_blured, (W, H))
93
+ return mask_blured.reshape((mask_blured.shape + (1,)))
94
+
95
+
96
+ def get_alpha_blend_mask(mask):
97
+ kernel_list = [(11, 11), (9, 9), (7, 7), (5, 5), (3, 3)]
98
+ blend_list = [0.25, 0.5, 0.75]
99
+ kernel_idxs = random.choices(range(len(kernel_list)), k=2)
100
+ blend_ratio = blend_list[random.sample(range(len(blend_list)), 1)[0]]
101
+ mask_blured = cv2.GaussianBlur(mask, kernel_list[0], 0)
102
+ # print(mask_blured.max())
103
+ mask_blured[mask_blured < mask_blured.max()] = 0
104
+ mask_blured[mask_blured > 0] = 1
105
+ # mask_blured = mask
106
+ mask_blured = cv2.GaussianBlur(mask_blured, kernel_list[kernel_idxs[1]], 0)
107
+ mask_blured = mask_blured / (mask_blured.max())
108
+ return mask_blured.reshape((mask_blured.shape + (1,)))
109
+
110
+
111
+ class I2GDataset(DeepfakeAbstractBaseDataset):
112
+ def __init__(self, config=None, mode='train'):
113
+ #config['GridShuffle']['p'] = 0
114
+ super().__init__(config, mode)
115
+ real_images_list = [img for img, label in zip(self.image_list, self.label_list) if label == 0]
116
+ self.real_images_list = list(set(real_images_list)) # de-duplicate since DF,F2F,FS,NT have same real images
117
+ self.source_transforms = self.get_source_transforms()
118
+ self.transforms = self.get_transforms()
119
+ self.init_nearest()
120
+
121
+ def init_nearest(self):
122
+ if os.path.exists('training/lib/nearest_face_info.pkl'):
123
+ with open('training/lib/nearest_face_info.pkl', 'rb') as f:
124
+ face_info = pickle.load(f)
125
+ self.face_info = face_info
126
+ # Check if the dictionary has already been created
127
+ if os.path.exists('training/lib/landmark_dict_ffall.pkl'):
128
+ with open('training/lib/landmark_dict_ffall.pkl', 'rb') as f:
129
+ landmark_dict = pickle.load(f)
130
+ self.landmark_dict = landmark_dict
131
+
132
+ def reorder_landmark(self, landmark):
133
+ landmark = landmark.copy() # 创建landmark的副本
134
+ landmark_add = np.zeros((13, 2))
135
+ for idx, idx_l in enumerate([77, 75, 76, 68, 69, 70, 71, 80, 72, 73, 79, 74, 78]):
136
+ landmark_add[idx] = landmark[idx_l]
137
+ landmark[68:] = landmark_add
138
+ return landmark
139
+
140
+ def hflip(self, img, mask=None, landmark=None, bbox=None):
141
+ H, W = img.shape[:2]
142
+ landmark = landmark.copy()
143
+ if bbox is not None:
144
+ bbox = bbox.copy()
145
+
146
+ if landmark is not None:
147
+ landmark_new = np.zeros_like(landmark)
148
+
149
+ landmark_new[:17] = landmark[:17][::-1]
150
+ landmark_new[17:27] = landmark[17:27][::-1]
151
+
152
+ landmark_new[27:31] = landmark[27:31]
153
+ landmark_new[31:36] = landmark[31:36][::-1]
154
+
155
+ landmark_new[36:40] = landmark[42:46][::-1]
156
+ landmark_new[40:42] = landmark[46:48][::-1]
157
+
158
+ landmark_new[42:46] = landmark[36:40][::-1]
159
+ landmark_new[46:48] = landmark[40:42][::-1]
160
+
161
+ landmark_new[48:55] = landmark[48:55][::-1]
162
+ landmark_new[55:60] = landmark[55:60][::-1]
163
+
164
+ landmark_new[60:65] = landmark[60:65][::-1]
165
+ landmark_new[65:68] = landmark[65:68][::-1]
166
+ if len(landmark) == 68:
167
+ pass
168
+ elif len(landmark) == 81:
169
+ landmark_new[68:81] = landmark[68:81][::-1]
170
+ else:
171
+ raise NotImplementedError
172
+ landmark_new[:, 0] = W - landmark_new[:, 0]
173
+
174
+ else:
175
+ landmark_new = None
176
+
177
+ if bbox is not None:
178
+ bbox_new = np.zeros_like(bbox)
179
+ bbox_new[0, 0] = bbox[1, 0]
180
+ bbox_new[1, 0] = bbox[0, 0]
181
+ bbox_new[:, 0] = W - bbox_new[:, 0]
182
+ bbox_new[:, 1] = bbox[:, 1].copy()
183
+ if len(bbox) > 2:
184
+ bbox_new[2, 0] = W - bbox[3, 0]
185
+ bbox_new[2, 1] = bbox[3, 1]
186
+ bbox_new[3, 0] = W - bbox[2, 0]
187
+ bbox_new[3, 1] = bbox[2, 1]
188
+ bbox_new[4, 0] = W - bbox[4, 0]
189
+ bbox_new[4, 1] = bbox[4, 1]
190
+ bbox_new[5, 0] = W - bbox[6, 0]
191
+ bbox_new[5, 1] = bbox[6, 1]
192
+ bbox_new[6, 0] = W - bbox[5, 0]
193
+ bbox_new[6, 1] = bbox[5, 1]
194
+ else:
195
+ bbox_new = None
196
+
197
+ if mask is not None:
198
+ mask = mask[:, ::-1]
199
+ else:
200
+ mask = None
201
+ img = img[:, ::-1].copy()
202
+ return img, mask, landmark_new, bbox_new
203
+
204
+
205
+
206
+ def get_source_transforms(self):
207
+ return A.Compose([
208
+ A.Compose([
209
+ A.RGBShift((-20, 20), (-20, 20), (-20, 20), p=0.3),
210
+ A.HueSaturationValue(hue_shift_limit=(-0.3, 0.3), sat_shift_limit=(-0.3, 0.3),
211
+ val_shift_limit=(-0.3, 0.3), p=1),
212
+ A.RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=1),
213
+ ], p=1),
214
+
215
+ A.OneOf([
216
+ RandomDownScale(p=1),
217
+ A.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=1),
218
+ ], p=1),
219
+
220
+ ], p=1.)
221
+
222
+ def get_fg_bg(self, one_lmk_path):
223
+ """
224
+ Get foreground and background paths
225
+ """
226
+ bg_lmk_path = one_lmk_path
227
+ # Randomly pick one from the nearest neighbors for the foreground
228
+ if bg_lmk_path in self.face_info:
229
+ fg_lmk_path = random.choice(self.face_info[bg_lmk_path])
230
+ else:
231
+ fg_lmk_path = bg_lmk_path
232
+ return fg_lmk_path, bg_lmk_path
233
+
234
+ def get_transforms(self):
235
+ return A.Compose([
236
+
237
+ A.RGBShift((-20, 20), (-20, 20), (-20, 20), p=0.3),
238
+ A.HueSaturationValue(hue_shift_limit=(-0.3, 0.3), sat_shift_limit=(-0.3, 0.3),
239
+ val_shift_limit=(-0.3, 0.3), p=0.3),
240
+ A.RandomBrightnessContrast(brightness_limit=(-0.3, 0.3), contrast_limit=(-0.3, 0.3), p=0.3),
241
+ A.ImageCompression(quality_lower=40, quality_upper=100, p=0.5),
242
+
243
+ ],
244
+ additional_targets={f'image1': 'image'},
245
+ p=1.)
246
+
247
+ def randaffine(self, img, mask):
248
+ f = A.Affine(
249
+ translate_percent={'x': (-0.03, 0.03), 'y': (-0.015, 0.015)},
250
+ scale=[0.95, 1 / 0.95],
251
+ fit_output=False,
252
+ p=1)
253
+
254
+ g = A.ElasticTransform(
255
+ alpha=50,
256
+ sigma=7,
257
+ alpha_affine=0,
258
+ p=1,
259
+ )
260
+
261
+ transformed = f(image=img, mask=mask)
262
+ img = transformed['image']
263
+
264
+ mask = transformed['mask']
265
+ transformed = g(image=img, mask=mask)
266
+ mask = transformed['mask']
267
+ return img, mask
268
+
269
+ def __len__(self):
270
+ return len(self.real_images_list)
271
+
272
+
273
+ def colorTransfer(self, src, dst, mask):
274
+ transferredDst = np.copy(dst)
275
+ maskIndices = np.where(mask != 0)
276
+ maskedSrc = src[maskIndices[0], maskIndices[1]].astype(np.float32)
277
+ maskedDst = dst[maskIndices[0], maskIndices[1]].astype(np.float32)
278
+
279
+ # Compute means and standard deviations
280
+ meanSrc = np.mean(maskedSrc, axis=0)
281
+ stdSrc = np.std(maskedSrc, axis=0)
282
+ meanDst = np.mean(maskedDst, axis=0)
283
+ stdDst = np.std(maskedDst, axis=0)
284
+
285
+ # Perform color transfer
286
+ maskedDst = (maskedDst - meanDst) * (stdSrc / stdDst) + meanSrc
287
+ maskedDst = np.clip(maskedDst, 0, 255)
288
+
289
+ # Copy the entire background into transferredDst
290
+ transferredDst = np.copy(dst)
291
+ # Now apply color transfer only to the masked region
292
+ transferredDst[maskIndices[0], maskIndices[1]] = maskedDst.astype(np.uint8)
293
+
294
+ return transferredDst
295
+
296
+
297
+
298
+ def two_blending(self, img_bg, img_fg, landmark):
299
+ H, W = len(img_bg), len(img_bg[0])
300
+ if np.random.rand() < 0.25:
301
+ landmark = landmark[:68]
302
+ logging.disable(logging.FATAL)
303
+ mask = random_get_hull(landmark, img_bg)
304
+ logging.disable(logging.NOTSET)
305
+ source = img_fg.copy()
306
+ target = img_bg.copy()
307
+ # if np.random.rand() < 0.5:
308
+ # source = self.source_transforms(image=source.astype(np.uint8))['image']
309
+ # else:
310
+ # target = self.source_transforms(image=target.astype(np.uint8))['image']
311
+ source_v2, mask_v2 = self.randaffine(source, mask)
312
+ source_v3=self.colorTransfer(target,source_v2,mask_v2)
313
+ img_blended, mask = dynamic_blend(source_v3, target, mask_v2)
314
+ img_blended = img_blended.astype(np.uint8)
315
+ img = img_bg.astype(np.uint8)
316
+
317
+ return img, img_blended, mask.squeeze(2)
318
+
319
+
320
+ def __getitem__(self, index):
321
+ image_path_bg = self.real_images_list[index]
322
+ label = 0
323
+
324
+ # Get the mask and landmark paths
325
+ landmark_path_bg = image_path_bg.replace('frames', 'landmarks').replace('.png', '.npy') # Use .npy for landmark
326
+ landmark_path_fg, landmark_path_bg = self.get_fg_bg(landmark_path_bg)
327
+ image_path_fg = landmark_path_fg.replace('landmarks','frames').replace('.npy','.png')
328
+ try:
329
+ image_bg = self.load_rgb(image_path_bg)
330
+ image_fg = self.load_rgb(image_path_fg)
331
+ except Exception as e:
332
+ # Skip this image and return the first one
333
+ print(f"Error loading image at index {index}: {e}")
334
+ return self.__getitem__(0)
335
+ image_bg = np.array(image_bg) # Convert to numpy array for data augmentation
336
+ image_fg = np.array(image_fg) # Convert to numpy array for data augmentation
337
+
338
+ landmarks_bg = self.load_landmark(landmark_path_bg)
339
+ landmarks_fg = self.load_landmark(landmark_path_fg)
340
+
341
+
342
+ landmarks_bg = np.clip(landmarks_bg, 0, self.config['resolution'] - 1)
343
+ landmarks_bg = self.reorder_landmark(landmarks_bg)
344
+
345
+ img_r, img_f, mask_f = self.two_blending(image_bg.copy(), image_fg.copy(),landmarks_bg.copy())
346
+ transformed = self.transforms(image=img_f.astype('uint8'), image1=img_r.astype('uint8'))
347
+ img_f = transformed['image']
348
+ img_r = transformed['image1']
349
+ # img_f = img_f.transpose((2, 0, 1))
350
+ # img_r = img_r.transpose((2, 0, 1))
351
+ img_f = self.normalize(self.to_tensor(img_f))
352
+ img_r = self.normalize(self.to_tensor(img_r))
353
+ mask_f = self.to_tensor(mask_f)
354
+ mask_r=torch.zeros_like(mask_f) # zeros or ones
355
+ return img_f, img_r, mask_f,mask_r
356
+
357
+ @staticmethod
358
+ def collate_fn(batch):
359
+ img_f, img_r, mask_f,mask_r = zip(*batch)
360
+ data = {}
361
+ fake_mask = torch.stack(mask_f,dim=0)
362
+ real_mask = torch.stack(mask_r, dim=0)
363
+ fake_images = torch.stack(img_f, dim=0)
364
+ real_images = torch.stack(img_r, dim=0)
365
+ data['image'] = torch.cat([real_images, fake_images], dim=0)
366
+ data['label'] = torch.tensor([0] * len(img_r) + [1] * len(img_f))
367
+ data['landmark'] = None
368
+ data['mask'] = torch.cat([real_mask, fake_mask], dim=0)
369
+ return data
370
+
371
+
372
+ if __name__ == '__main__':
373
+ detector_path = r"./training/config/detector/xception.yaml"
374
+ # weights_path = "./ckpts/xception/CDFv2/tb_v1/ov.pth"
375
+ with open(detector_path, 'r') as f:
376
+ config = yaml.safe_load(f)
377
+ with open('./training/config/train_config.yaml', 'r') as f:
378
+ config2 = yaml.safe_load(f)
379
+ config2['data_manner'] = 'lmdb'
380
+ config['dataset_json_folder'] = 'preprocessing/dataset_json_v3'
381
+ config.update(config2)
382
+ dataset = I2GDataset(config=config)
383
+ batch_size = 2
384
+ dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True,collate_fn=dataset.collate_fn)
385
+
386
+ for i, batch in enumerate(dataloader):
387
+ print(f"Batch {i}: {batch}")
388
+ continue
389
+
training/dataset/__init__.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ current_file_path = os.path.abspath(__file__)
4
+ parent_dir = os.path.dirname(os.path.dirname(current_file_path))
5
+ project_root_dir = os.path.dirname(parent_dir)
6
+ sys.path.append(parent_dir)
7
+ sys.path.append(project_root_dir)
8
+
9
+
10
+ from .I2G_dataset import I2GDataset
11
+ from .iid_dataset import IIDDataset
12
+ from .abstract_dataset import DeepfakeAbstractBaseDataset
13
+ from .ff_blend import FFBlendDataset
14
+ from .fwa_blend import FWABlendDataset
15
+ from .lrl_dataset import LRLDataset
16
+ from .pair_dataset import pairDataset
17
+ from .sbi_dataset import SBIDataset
18
+ from .lsda_dataset import LSDADataset
19
+ from .tall_dataset import TALLDataset
training/dataset/abstract_dataset.py ADDED
@@ -0,0 +1,621 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # author: Zhiyuan Yan
2
+ # email: [email protected]
3
+ # date: 2023-03-30
4
+ # description: Abstract Base Class for all types of deepfake datasets.
5
+
6
+ import sys
7
+
8
+ import lmdb
9
+
10
+ sys.path.append('.')
11
+
12
+ import os
13
+ import math
14
+ import yaml
15
+ import glob
16
+ import json
17
+
18
+ import numpy as np
19
+ from copy import deepcopy
20
+ import cv2
21
+ import random
22
+ from PIL import Image
23
+ from collections import defaultdict
24
+
25
+ import torch
26
+ from torch.autograd import Variable
27
+ from torch.utils import data
28
+ from torchvision import transforms as T
29
+
30
+ import albumentations as A
31
+
32
+ from .albu import IsotropicResize
33
+
34
+ FFpp_pool=['FaceForensics++','FaceShifter','DeepFakeDetection','FF-DF','FF-F2F','FF-FS','FF-NT']#
35
+
36
+ def all_in_pool(inputs,pool):
37
+ for each in inputs:
38
+ if each not in pool:
39
+ return False
40
+ return True
41
+
42
+
43
+ class DeepfakeAbstractBaseDataset(data.Dataset):
44
+ """
45
+ Abstract base class for all deepfake datasets.
46
+ """
47
+ def __init__(self, config=None, mode='train'):
48
+ """Initializes the dataset object.
49
+
50
+ Args:
51
+ config (dict): A dictionary containing configuration parameters.
52
+ mode (str): A string indicating the mode (train or test).
53
+
54
+ Raises:
55
+ NotImplementedError: If mode is not train or test.
56
+ """
57
+
58
+ # Set the configuration and mode
59
+ self.config = config
60
+ self.mode = mode
61
+ self.compression = config['compression']
62
+ self.frame_num = config['frame_num'][mode]
63
+
64
+ # Check if 'video_mode' exists in config, otherwise set video_level to False
65
+ self.video_level = config.get('video_mode', False)
66
+ self.clip_size = config.get('clip_size', None)
67
+ self.lmdb = config.get('lmdb', False)
68
+ # Dataset dictionary
69
+ self.image_list = []
70
+ self.label_list = []
71
+
72
+ # Set the dataset dictionary based on the mode
73
+ if mode == 'train':
74
+ dataset_list = config['train_dataset']
75
+ # Training data should be collected together for training
76
+ image_list, label_list = [], []
77
+ for one_data in dataset_list:
78
+ tmp_image, tmp_label, tmp_name = self.collect_img_and_label_for_one_dataset(one_data)
79
+ image_list.extend(tmp_image)
80
+ label_list.extend(tmp_label)
81
+ if self.lmdb:
82
+ if len(dataset_list)>1:
83
+ if all_in_pool(dataset_list,FFpp_pool):
84
+ lmdb_path = os.path.join(config['lmdb_dir'], f"FaceForensics++_lmdb")
85
+ self.env = lmdb.open(lmdb_path, create=False, subdir=True, readonly=True, lock=False)
86
+ else:
87
+ raise ValueError('Training with multiple dataset and lmdb is not implemented yet.')
88
+ else:
89
+ lmdb_path = os.path.join(config['lmdb_dir'], f"{dataset_list[0] if dataset_list[0] not in FFpp_pool else 'FaceForensics++'}_lmdb")
90
+ self.env = lmdb.open(lmdb_path, create=False, subdir=True, readonly=True, lock=False)
91
+ elif mode == 'test':
92
+ one_data = config['test_dataset']
93
+ # Test dataset should be evaluated separately. So collect only one dataset each time
94
+ image_list, label_list, name_list = self.collect_img_and_label_for_one_dataset(one_data)
95
+ if self.lmdb:
96
+ lmdb_path = os.path.join(config['lmdb_dir'], f"{one_data}_lmdb" if one_data not in FFpp_pool else 'FaceForensics++_lmdb')
97
+ self.env = lmdb.open(lmdb_path, create=False, subdir=True, readonly=True, lock=False)
98
+ else:
99
+ raise NotImplementedError('Only train and test modes are supported.')
100
+
101
+ assert len(image_list)!=0 and len(label_list)!=0, f"Collect nothing for {mode} mode!"
102
+ self.image_list, self.label_list = image_list, label_list
103
+
104
+
105
+ # Create a dictionary containing the image and label lists
106
+ self.data_dict = {
107
+ 'image': self.image_list,
108
+ 'label': self.label_list,
109
+ }
110
+
111
+ self.transform = self.init_data_aug_method()
112
+
113
+ def init_data_aug_method(self):
114
+ trans = A.Compose([
115
+ A.HorizontalFlip(p=self.config['data_aug']['flip_prob']),
116
+ A.Rotate(limit=self.config['data_aug']['rotate_limit'], p=self.config['data_aug']['rotate_prob']),
117
+ A.GaussianBlur(blur_limit=self.config['data_aug']['blur_limit'], p=self.config['data_aug']['blur_prob']),
118
+ A.OneOf([
119
+ IsotropicResize(max_side=self.config['resolution'], interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC),
120
+ IsotropicResize(max_side=self.config['resolution'], interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_LINEAR),
121
+ IsotropicResize(max_side=self.config['resolution'], interpolation_down=cv2.INTER_LINEAR, interpolation_up=cv2.INTER_LINEAR),
122
+ ], p = 0 if self.config['with_landmark'] else 1),
123
+ A.OneOf([
124
+ A.RandomBrightnessContrast(brightness_limit=self.config['data_aug']['brightness_limit'], contrast_limit=self.config['data_aug']['contrast_limit']),
125
+ A.FancyPCA(),
126
+ A.HueSaturationValue()
127
+ ], p=0.5),
128
+ A.ImageCompression(quality_lower=self.config['data_aug']['quality_lower'], quality_upper=self.config['data_aug']['quality_upper'], p=0.5)
129
+ ],
130
+ keypoint_params=A.KeypointParams(format='xy') if self.config['with_landmark'] else None
131
+ )
132
+ return trans
133
+
134
+ def rescale_landmarks(self, landmarks, original_size=256, new_size=224):
135
+ scale_factor = new_size / original_size
136
+ rescaled_landmarks = landmarks * scale_factor
137
+ return rescaled_landmarks
138
+
139
+
140
+ def collect_img_and_label_for_one_dataset(self, dataset_name: str):
141
+ """Collects image and label lists.
142
+
143
+ Args:
144
+ dataset_name (str): A list containing one dataset information. e.g., 'FF-F2F'
145
+
146
+ Returns:
147
+ list: A list of image paths.
148
+ list: A list of labels.
149
+
150
+ Raises:
151
+ ValueError: If image paths or labels are not found.
152
+ NotImplementedError: If the dataset is not implemented yet.
153
+ """
154
+ # Initialize the label and frame path lists
155
+ label_list = []
156
+ frame_path_list = []
157
+
158
+ # Record video name for video-level metrics
159
+ video_name_list = []
160
+
161
+ # Try to get the dataset information from the JSON file
162
+ if not os.path.exists(self.config['dataset_json_folder']):
163
+ self.config['dataset_json_folder'] = self.config['dataset_json_folder'].replace('/Youtu_Pangu_Security_Public', '/Youtu_Pangu_Security/public')
164
+ try:
165
+ with open(os.path.join(self.config['dataset_json_folder'], dataset_name + '.json'), 'r') as f:
166
+ dataset_info = json.load(f)
167
+ except Exception as e:
168
+ print(e)
169
+ raise ValueError(f'dataset {dataset_name} not exist!')
170
+
171
+ # If JSON file exists, do the following data collection
172
+ # FIXME: ugly, need to be modified here.
173
+ cp = None
174
+ if dataset_name == 'FaceForensics++_c40':
175
+ dataset_name = 'FaceForensics++'
176
+ cp = 'c40'
177
+ elif dataset_name == 'FF-DF_c40':
178
+ dataset_name = 'FF-DF'
179
+ cp = 'c40'
180
+ elif dataset_name == 'FF-F2F_c40':
181
+ dataset_name = 'FF-F2F'
182
+ cp = 'c40'
183
+ elif dataset_name == 'FF-FS_c40':
184
+ dataset_name = 'FF-FS'
185
+ cp = 'c40'
186
+ elif dataset_name == 'FF-NT_c40':
187
+ dataset_name = 'FF-NT'
188
+ cp = 'c40'
189
+ # Get the information for the current dataset
190
+ for label in dataset_info[dataset_name]:
191
+ sub_dataset_info = dataset_info[dataset_name][label][self.mode]
192
+ # Special case for FaceForensics++ and DeepFakeDetection, choose the compression type
193
+ if cp == None and dataset_name in ['FF-DF', 'FF-F2F', 'FF-FS', 'FF-NT', 'FaceForensics++','DeepFakeDetection','FaceShifter']:
194
+ sub_dataset_info = sub_dataset_info[self.compression]
195
+ elif cp == 'c40' and dataset_name in ['FF-DF', 'FF-F2F', 'FF-FS', 'FF-NT', 'FaceForensics++','DeepFakeDetection','FaceShifter']:
196
+ sub_dataset_info = sub_dataset_info['c40']
197
+
198
+ # Iterate over the videos in the dataset
199
+ for video_name, video_info in sub_dataset_info.items():
200
+ # Unique video name
201
+ unique_video_name = video_info['label'] + '_' + video_name
202
+
203
+ # Get the label and frame paths for the current video
204
+ if video_info['label'] not in self.config['label_dict']:
205
+ raise ValueError(f'Label {video_info["label"]} is not found in the configuration file.')
206
+ label = self.config['label_dict'][video_info['label']]
207
+ frame_paths = video_info['frames']
208
+ # sorted video path to the lists
209
+ if '\\' in frame_paths[0]:
210
+ frame_paths = sorted(frame_paths, key=lambda x: int(x.split('\\')[-1].split('.')[0]))
211
+ else:
212
+ frame_paths = sorted(frame_paths, key=lambda x: int(x.split('/')[-1].split('.')[0]))
213
+
214
+ # Consider the case when the actual number of frames (e.g., 270) is larger than the specified (i.e., self.frame_num=32)
215
+ # In this case, we select self.frame_num frames from the original 270 frames
216
+ total_frames = len(frame_paths)
217
+ if self.frame_num < total_frames:
218
+ total_frames = self.frame_num
219
+ if self.video_level:
220
+ # Select clip_size continuous frames
221
+ start_frame = random.randint(0, total_frames - self.frame_num) if self.mode == 'train' else 0
222
+ frame_paths = frame_paths[start_frame:start_frame + self.frame_num] # update total_frames
223
+ else:
224
+ # Select self.frame_num frames evenly distributed throughout the video
225
+ step = total_frames // self.frame_num
226
+ frame_paths = [frame_paths[i] for i in range(0, total_frames, step)][:self.frame_num]
227
+
228
+ # If video-level methods, crop clips from the selected frames if needed
229
+ if self.video_level:
230
+ if self.clip_size is None:
231
+ raise ValueError('clip_size must be specified when video_level is True.')
232
+ # Check if the number of total frames is greater than or equal to clip_size
233
+ if total_frames >= self.clip_size:
234
+ # Initialize an empty list to store the selected continuous frames
235
+ selected_clips = []
236
+
237
+ # Calculate the number of clips to select
238
+ num_clips = total_frames // self.clip_size
239
+
240
+ if num_clips > 1:
241
+ # Calculate the step size between each clip
242
+ clip_step = (total_frames - self.clip_size) // (num_clips - 1)
243
+
244
+ # Select clip_size continuous frames from each part of the video
245
+ for i in range(num_clips):
246
+ # Ensure start_frame + self.clip_size - 1 does not exceed the index of the last frame
247
+ start_frame = random.randrange(i * clip_step, min((i + 1) * clip_step, total_frames - self.clip_size + 1)) if self.mode == 'train' else i * clip_step
248
+ continuous_frames = frame_paths[start_frame:start_frame + self.clip_size]
249
+ assert len(continuous_frames) == self.clip_size, 'clip_size is not equal to the length of frame_path_list'
250
+ selected_clips.append(continuous_frames)
251
+
252
+ else:
253
+ start_frame = random.randrange(0, total_frames - self.clip_size + 1) if self.mode == 'train' else 0
254
+ continuous_frames = frame_paths[start_frame:start_frame + self.clip_size]
255
+ assert len(continuous_frames)==self.clip_size, 'clip_size is not equal to the length of frame_path_list'
256
+ selected_clips.append(continuous_frames)
257
+
258
+ # Append the list of selected clips and append the label
259
+ label_list.extend([label] * len(selected_clips))
260
+ frame_path_list.extend(selected_clips)
261
+ # video name save
262
+ video_name_list.extend([unique_video_name] * len(selected_clips))
263
+
264
+ else:
265
+ print(f"Skipping video {unique_video_name} because it has less than clip_size ({self.clip_size}) frames ({total_frames}).")
266
+
267
+ # Otherwise, extend the label and frame paths to the lists according to the number of frames
268
+ else:
269
+ # Extend the label and frame paths to the lists according to the number of frames
270
+ label_list.extend([label] * total_frames)
271
+ frame_path_list.extend(frame_paths)
272
+ # video name save
273
+ video_name_list.extend([unique_video_name] * len(frame_paths))
274
+
275
+ # Shuffle the label and frame path lists in the same order
276
+ shuffled = list(zip(label_list, frame_path_list, video_name_list))
277
+ random.shuffle(shuffled)
278
+ label_list, frame_path_list, video_name_list = zip(*shuffled)
279
+
280
+ return frame_path_list, label_list, video_name_list
281
+
282
+
283
+ def load_rgb(self, file_path):
284
+ """
285
+ Load an RGB image from a file path and resize it to a specified resolution.
286
+
287
+ Args:
288
+ file_path: A string indicating the path to the image file.
289
+
290
+ Returns:
291
+ An Image object containing the loaded and resized image.
292
+
293
+ Raises:
294
+ ValueError: If the loaded image is None.
295
+ """
296
+ size = self.config['resolution'] # if self.mode == "train" else self.config['resolution']
297
+ if not self.lmdb:
298
+ if not file_path[0] == '.':
299
+ file_path = f'{self.config["rgb_dir"]}'+file_path
300
+ assert os.path.exists(file_path), f"{file_path} does not exist"
301
+ img = cv2.imread(file_path)
302
+ if img is None:
303
+ raise ValueError('Loaded image is None: {}'.format(file_path))
304
+ elif self.lmdb:
305
+ with self.env.begin(write=False) as txn:
306
+ # transfer the path format from rgb-path to lmdb-key
307
+ if file_path[0]=='.':
308
+ file_path=file_path.replace('./datasets\\','')
309
+
310
+ image_bin = txn.get(file_path.encode())
311
+ image_buf = np.frombuffer(image_bin, dtype=np.uint8)
312
+ img = cv2.imdecode(image_buf, cv2.IMREAD_COLOR)
313
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
314
+ img = cv2.resize(img, (size, size), interpolation=cv2.INTER_CUBIC)
315
+ return Image.fromarray(np.array(img, dtype=np.uint8))
316
+
317
+
318
+ def load_mask(self, file_path):
319
+ """
320
+ Load a binary mask image from a file path and resize it to a specified resolution.
321
+
322
+ Args:
323
+ file_path: A string indicating the path to the mask file.
324
+
325
+ Returns:
326
+ A numpy array containing the loaded and resized mask.
327
+
328
+ Raises:
329
+ None.
330
+ """
331
+ size = self.config['resolution']
332
+ if file_path is None:
333
+ return np.zeros((size, size, 1))
334
+ if not self.lmdb:
335
+ if not file_path[0] == '.':
336
+ file_path = f'./{self.config["rgb_dir"]}\\'+file_path
337
+ if os.path.exists(file_path):
338
+ mask = cv2.imread(file_path, 0)
339
+ if mask is None:
340
+ mask = np.zeros((size, size))
341
+ else:
342
+ return np.zeros((size, size, 1))
343
+ else:
344
+ with self.env.begin(write=False) as txn:
345
+ # transfer the path format from rgb-path to lmdb-key
346
+ if file_path[0]=='.':
347
+ file_path=file_path.replace('./datasets\\','')
348
+
349
+ image_bin = txn.get(file_path.encode())
350
+ if image_bin is None:
351
+ mask = np.zeros((size, size,3))
352
+ else:
353
+ image_buf = np.frombuffer(image_bin, dtype=np.uint8)
354
+ # cv2.IMREAD_GRAYSCALE为灰度图,cv2.IMREAD_COLOR为彩色图
355
+ mask = cv2.imdecode(image_buf, cv2.IMREAD_COLOR)
356
+ mask = cv2.resize(mask, (size, size)) / 255
357
+ mask = np.expand_dims(mask, axis=2)
358
+ return np.float32(mask)
359
+
360
+ def load_landmark(self, file_path):
361
+ """
362
+ Load 2D facial landmarks from a file path.
363
+
364
+ Args:
365
+ file_path: A string indicating the path to the landmark file.
366
+
367
+ Returns:
368
+ A numpy array containing the loaded landmarks.
369
+
370
+ Raises:
371
+ None.
372
+ """
373
+ if file_path is None:
374
+ return np.zeros((81, 2))
375
+ if not self.lmdb:
376
+ if not file_path[0] == '.':
377
+ file_path = f'./{self.config["rgb_dir"]}\\'+file_path
378
+ if os.path.exists(file_path):
379
+ landmark = np.load(file_path)
380
+ else:
381
+ return np.zeros((81, 2))
382
+ else:
383
+ with self.env.begin(write=False) as txn:
384
+ # transfer the path format from rgb-path to lmdb-key
385
+ if file_path[0]=='.':
386
+ file_path=file_path.replace('./datasets\\','')
387
+ binary = txn.get(file_path.encode())
388
+ landmark = np.frombuffer(binary, dtype=np.uint32).reshape((81, 2))
389
+ landmark=self.rescale_landmarks(np.float32(landmark), original_size=256, new_size=self.config['resolution'])
390
+ return landmark
391
+
392
+ def to_tensor(self, img):
393
+ """
394
+ Convert an image to a PyTorch tensor.
395
+ """
396
+ return T.ToTensor()(img)
397
+
398
+ def normalize(self, img):
399
+ """
400
+ Normalize an image.
401
+ """
402
+ mean = self.config['mean']
403
+ std = self.config['std']
404
+ normalize = T.Normalize(mean=mean, std=std)
405
+ return normalize(img)
406
+
407
+ def data_aug(self, img, landmark=None, mask=None, augmentation_seed=None):
408
+ """
409
+ Apply data augmentation to an image, landmark, and mask.
410
+
411
+ Args:
412
+ img: An Image object containing the image to be augmented.
413
+ landmark: A numpy array containing the 2D facial landmarks to be augmented.
414
+ mask: A numpy array containing the binary mask to be augmented.
415
+
416
+ Returns:
417
+ The augmented image, landmark, and mask.
418
+ """
419
+
420
+ # Set the seed for the random number generator
421
+ if augmentation_seed is not None:
422
+ random.seed(augmentation_seed)
423
+ np.random.seed(augmentation_seed)
424
+
425
+ # Create a dictionary of arguments
426
+ kwargs = {'image': img}
427
+
428
+ # Check if the landmark and mask are not None
429
+ if landmark is not None:
430
+ kwargs['keypoints'] = landmark
431
+ kwargs['keypoint_params'] = A.KeypointParams(format='xy')
432
+ if mask is not None:
433
+ mask = mask.squeeze(2)
434
+ if mask.max() > 0:
435
+ kwargs['mask'] = mask
436
+
437
+ # Apply data augmentation
438
+ transformed = self.transform(**kwargs)
439
+
440
+ # Get the augmented image, landmark, and mask
441
+ augmented_img = transformed['image']
442
+ augmented_landmark = transformed.get('keypoints')
443
+ augmented_mask = transformed.get('mask',mask)
444
+
445
+ # Convert the augmented landmark to a numpy array
446
+ if augmented_landmark is not None:
447
+ augmented_landmark = np.array(augmented_landmark)
448
+
449
+ # Reset the seeds to ensure different transformations for different videos
450
+ if augmentation_seed is not None:
451
+ random.seed()
452
+ np.random.seed()
453
+
454
+ return augmented_img, augmented_landmark, augmented_mask
455
+
456
+ def __getitem__(self, index, no_norm=False):
457
+ """
458
+ Returns the data point at the given index.
459
+
460
+ Args:
461
+ index (int): The index of the data point.
462
+
463
+ Returns:
464
+ A tuple containing the image tensor, the label tensor, the landmark tensor,
465
+ and the mask tensor.
466
+ """
467
+ # Get the image paths and label
468
+ image_paths = self.data_dict['image'][index]
469
+ label = self.data_dict['label'][index]
470
+
471
+ if not isinstance(image_paths, list):
472
+ image_paths = [image_paths] # for the image-level IO, only one frame is used
473
+
474
+ image_tensors = []
475
+ landmark_tensors = []
476
+ mask_tensors = []
477
+ augmentation_seed = None
478
+
479
+ for image_path in image_paths:
480
+ # Initialize a new seed for data augmentation at the start of each video
481
+ if self.video_level and image_path == image_paths[0]:
482
+ augmentation_seed = random.randint(0, 2**32 - 1)
483
+
484
+ # Get the mask and landmark paths
485
+ mask_path = image_path.replace('frames', 'masks') # Use .png for mask
486
+ landmark_path = image_path.replace('frames', 'landmarks').replace('.png', '.npy') # Use .npy for landmark
487
+
488
+ # Load the image
489
+ try:
490
+ image = self.load_rgb(image_path)
491
+ except Exception as e:
492
+ # Skip this image and return the first one
493
+ print(f"Error loading image at index {index}: {e}")
494
+ return self.__getitem__(0)
495
+ image = np.array(image) # Convert to numpy array for data augmentation
496
+
497
+ # Load mask and landmark (if needed)
498
+ if self.config['with_mask']:
499
+ mask = self.load_mask(mask_path)
500
+ else:
501
+ mask = None
502
+ if self.config['with_landmark']:
503
+ landmarks = self.load_landmark(landmark_path)
504
+ else:
505
+ landmarks = None
506
+
507
+ # Do Data Augmentation
508
+ if self.mode == 'train' and self.config['use_data_augmentation']:
509
+ image_trans, landmarks_trans, mask_trans = self.data_aug(image, landmarks, mask, augmentation_seed)
510
+ else:
511
+ image_trans, landmarks_trans, mask_trans = deepcopy(image), deepcopy(landmarks), deepcopy(mask)
512
+
513
+
514
+ # To tensor and normalize
515
+ if not no_norm:
516
+ image_trans = self.normalize(self.to_tensor(image_trans))
517
+ if self.config['with_landmark']:
518
+ landmarks_trans = torch.from_numpy(landmarks)
519
+ if self.config['with_mask']:
520
+ mask_trans = torch.from_numpy(mask_trans)
521
+
522
+ image_tensors.append(image_trans)
523
+ landmark_tensors.append(landmarks_trans)
524
+ mask_tensors.append(mask_trans)
525
+
526
+ if self.video_level:
527
+ # Stack image tensors along a new dimension (time)
528
+ image_tensors = torch.stack(image_tensors, dim=0)
529
+ # Stack landmark and mask tensors along a new dimension (time)
530
+ if not any(landmark is None or (isinstance(landmark, list) and None in landmark) for landmark in landmark_tensors):
531
+ landmark_tensors = torch.stack(landmark_tensors, dim=0)
532
+ if not any(m is None or (isinstance(m, list) and None in m) for m in mask_tensors):
533
+ mask_tensors = torch.stack(mask_tensors, dim=0)
534
+ else:
535
+ # Get the first image tensor
536
+ image_tensors = image_tensors[0]
537
+ # Get the first landmark and mask tensors
538
+ if not any(landmark is None or (isinstance(landmark, list) and None in landmark) for landmark in landmark_tensors):
539
+ landmark_tensors = landmark_tensors[0]
540
+ if not any(m is None or (isinstance(m, list) and None in m) for m in mask_tensors):
541
+ mask_tensors = mask_tensors[0]
542
+
543
+ return image_tensors, label, landmark_tensors, mask_tensors
544
+
545
+ @staticmethod
546
+ def collate_fn(batch):
547
+ """
548
+ Collate a batch of data points.
549
+
550
+ Args:
551
+ batch (list): A list of tuples containing the image tensor, the label tensor,
552
+ the landmark tensor, and the mask tensor.
553
+
554
+ Returns:
555
+ A tuple containing the image tensor, the label tensor, the landmark tensor,
556
+ and the mask tensor.
557
+ """
558
+ # Separate the image, label, landmark, and mask tensors
559
+ images, labels, landmarks, masks = zip(*batch)
560
+
561
+ # Stack the image, label, landmark, and mask tensors
562
+ images = torch.stack(images, dim=0)
563
+ labels = torch.LongTensor(labels)
564
+
565
+ # Special case for landmarks and masks if they are None
566
+ if not any(landmark is None or (isinstance(landmark, list) and None in landmark) for landmark in landmarks):
567
+ landmarks = torch.stack(landmarks, dim=0)
568
+ else:
569
+ landmarks = None
570
+
571
+ if not any(m is None or (isinstance(m, list) and None in m) for m in masks):
572
+ masks = torch.stack(masks, dim=0)
573
+ else:
574
+ masks = None
575
+
576
+ # Create a dictionary of the tensors
577
+ data_dict = {}
578
+ data_dict['image'] = images
579
+ data_dict['label'] = labels
580
+ data_dict['landmark'] = landmarks
581
+ data_dict['mask'] = masks
582
+ return data_dict
583
+
584
+ def __len__(self):
585
+ """
586
+ Return the length of the dataset.
587
+
588
+ Args:
589
+ None.
590
+
591
+ Returns:
592
+ An integer indicating the length of the dataset.
593
+
594
+ Raises:
595
+ AssertionError: If the number of images and labels in the dataset are not equal.
596
+ """
597
+ assert len(self.image_list) == len(self.label_list), 'Number of images and labels are not equal'
598
+ return len(self.image_list)
599
+
600
+
601
+ if __name__ == "__main__":
602
+ with open('/data/home/zhiyuanyan/DeepfakeBench/training/config/detector/video_baseline.yaml', 'r') as f:
603
+ config = yaml.safe_load(f)
604
+ train_set = DeepfakeAbstractBaseDataset(
605
+ config = config,
606
+ mode = 'train',
607
+ )
608
+ train_data_loader = \
609
+ torch.utils.data.DataLoader(
610
+ dataset=train_set,
611
+ batch_size=config['train_batchSize'],
612
+ shuffle=True,
613
+ num_workers=0,
614
+ collate_fn=train_set.collate_fn,
615
+ )
616
+ from tqdm import tqdm
617
+ for iteration, batch in enumerate(tqdm(train_data_loader)):
618
+ # print(iteration)
619
+ ...
620
+ # if iteration > 10:
621
+ # break
training/dataset/albu.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+
3
+ import cv2
4
+ import numpy as np
5
+ from albumentations import DualTransform, ImageOnlyTransform
6
+ from albumentations.augmentations.crops.functional import crop
7
+
8
+
9
+ def isotropically_resize_image(img, size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC):
10
+ h, w = img.shape[:2]
11
+ if max(w, h) == size:
12
+ return img
13
+ if w > h:
14
+ scale = size / w
15
+ h = h * scale
16
+ w = size
17
+ else:
18
+ scale = size / h
19
+ w = w * scale
20
+ h = size
21
+ interpolation = interpolation_up if scale > 1 else interpolation_down
22
+ resized = cv2.resize(img, (int(w), int(h)), interpolation=interpolation)
23
+ return resized
24
+
25
+
26
+ class IsotropicResize(DualTransform):
27
+ def __init__(self, max_side, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC,
28
+ always_apply=False, p=1):
29
+ super(IsotropicResize, self).__init__(always_apply, p)
30
+ self.max_side = max_side
31
+ self.interpolation_down = interpolation_down
32
+ self.interpolation_up = interpolation_up
33
+
34
+ def apply(self, img, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC, **params):
35
+ return isotropically_resize_image(img, size=self.max_side, interpolation_down=interpolation_down,
36
+ interpolation_up=interpolation_up)
37
+
38
+ def apply_to_mask(self, img, **params):
39
+ return self.apply(img, interpolation_down=cv2.INTER_NEAREST, interpolation_up=cv2.INTER_NEAREST, **params)
40
+
41
+ def get_transform_init_args_names(self):
42
+ return ("max_side", "interpolation_down", "interpolation_up")
43
+
44
+
45
+ class Resize4xAndBack(ImageOnlyTransform):
46
+ def __init__(self, always_apply=False, p=0.5):
47
+ super(Resize4xAndBack, self).__init__(always_apply, p)
48
+
49
+ def apply(self, img, **params):
50
+ h, w = img.shape[:2]
51
+ scale = random.choice([2, 4])
52
+ img = cv2.resize(img, (w // scale, h // scale), interpolation=cv2.INTER_AREA)
53
+ img = cv2.resize(img, (w, h),
54
+ interpolation=random.choice([cv2.INTER_CUBIC, cv2.INTER_LINEAR, cv2.INTER_NEAREST]))
55
+ return img
56
+
57
+
58
+ class RandomSizedCropNonEmptyMaskIfExists(DualTransform):
59
+
60
+ def __init__(self, min_max_height, w2h_ratio=[0.7, 1.3], always_apply=False, p=0.5):
61
+ super(RandomSizedCropNonEmptyMaskIfExists, self).__init__(always_apply, p)
62
+
63
+ self.min_max_height = min_max_height
64
+ self.w2h_ratio = w2h_ratio
65
+
66
+ def apply(self, img, x_min=0, x_max=0, y_min=0, y_max=0, **params):
67
+ cropped = crop(img, x_min, y_min, x_max, y_max)
68
+ return cropped
69
+
70
+ @property
71
+ def targets_as_params(self):
72
+ return ["mask"]
73
+
74
+ def get_params_dependent_on_targets(self, params):
75
+ mask = params["mask"]
76
+ mask_height, mask_width = mask.shape[:2]
77
+ crop_height = int(mask_height * random.uniform(self.min_max_height[0], self.min_max_height[1]))
78
+ w2h_ratio = random.uniform(*self.w2h_ratio)
79
+ crop_width = min(int(crop_height * w2h_ratio), mask_width - 1)
80
+ if mask.sum() == 0:
81
+ x_min = random.randint(0, mask_width - crop_width + 1)
82
+ y_min = random.randint(0, mask_height - crop_height + 1)
83
+ else:
84
+ mask = mask.sum(axis=-1) if mask.ndim == 3 else mask
85
+ non_zero_yx = np.argwhere(mask)
86
+ y, x = random.choice(non_zero_yx)
87
+ x_min = x - random.randint(0, crop_width - 1)
88
+ y_min = y - random.randint(0, crop_height - 1)
89
+ x_min = np.clip(x_min, 0, mask_width - crop_width)
90
+ y_min = np.clip(y_min, 0, mask_height - crop_height)
91
+
92
+ x_max = x_min + crop_height
93
+ y_max = y_min + crop_width
94
+ y_max = min(mask_height, y_max)
95
+ x_max = min(mask_width, x_max)
96
+ return {"x_min": x_min, "x_max": x_max, "y_min": y_min, "y_max": y_max}
97
+
98
+ def get_transform_init_args_names(self):
99
+ return "min_max_height", "height", "width", "w2h_ratio"
training/dataset/face_utils.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ from skimage import transform as trans
4
+ # from mtcnn.mtcnn import MTCNN
5
+
6
+
7
+ def get_keypts(face):
8
+ # get key points from the results of mtcnn
9
+
10
+ if len(face['keypoints']) == 0:
11
+ return []
12
+
13
+ leye = np.array(face['keypoints']['left_eye'], dtype=np.int).reshape(-1, 2)
14
+ reye = np.array(face['keypoints']['right_eye'],
15
+ dtype=np.int).reshape(-1, 2)
16
+ nose = np.array(face['keypoints']['nose'], dtype=np.int).reshape(-1, 2)
17
+ lmouth = np.array(face['keypoints']['mouth_left'],
18
+ dtype=np.int).reshape(-1, 2)
19
+ rmouth = np.array(face['keypoints']['mouth_right'],
20
+ dtype=np.int).reshape(-1, 2)
21
+
22
+ pts = np.concatenate([leye, reye, nose, lmouth, rmouth], axis=0)
23
+
24
+ return pts
25
+
26
+
27
+ def img_align_crop(img, landmark=None, outsize=None, scale=1.3, mask=None):
28
+ """ align and crop the face according to the given bbox and landmarks
29
+ landmark: 5 key points
30
+ """
31
+
32
+ M = None
33
+
34
+ target_size = [112, 112]
35
+
36
+ dst = np.array([
37
+ [30.2946, 51.6963],
38
+ [65.5318, 51.5014],
39
+ [48.0252, 71.7366],
40
+ [33.5493, 92.3655],
41
+ [62.7299, 92.2041]], dtype=np.float32)
42
+
43
+ if target_size[1] == 112:
44
+ dst[:, 0] += 8.0
45
+
46
+ dst[:, 0] = dst[:, 0] * outsize[0] / target_size[0]
47
+ dst[:, 1] = dst[:, 1] * outsize[1] / target_size[1]
48
+
49
+ target_size = outsize
50
+
51
+ margin_rate = scale - 1
52
+ x_margin = target_size[0] * margin_rate / 2.
53
+ y_margin = target_size[1] * margin_rate / 2.
54
+
55
+ # move
56
+ dst[:, 0] += x_margin
57
+ dst[:, 1] += y_margin
58
+
59
+ # resize
60
+ dst[:, 0] *= target_size[0] / (target_size[0] + 2 * x_margin)
61
+ dst[:, 1] *= target_size[1] / (target_size[1] + 2 * y_margin)
62
+
63
+ src = landmark.astype(np.float32)
64
+
65
+ # use skimage tranformation
66
+ tform = trans.SimilarityTransform()
67
+ tform.estimate(src, dst)
68
+ M = tform.params[0:2, :]
69
+
70
+ # M: use opencv
71
+ # M = cv2.getAffineTransform(src[[0,1,2],:],dst[[0,1,2],:])
72
+
73
+ img = cv2.warpAffine(img, M, (target_size[1], target_size[0]))
74
+
75
+ if outsize is not None:
76
+ img = cv2.resize(img, (outsize[1], outsize[0]))
77
+
78
+ if mask is not None:
79
+ mask = cv2.warpAffine(mask, M, (target_size[1], target_size[0]))
80
+ mask = cv2.resize(mask, (outsize[1], outsize[0]))
81
+ return img, mask
82
+ else:
83
+ return img
84
+
85
+
86
+
87
+
88
+
89
+ def expand_bbox(bbox, width, height, scale=1.3, minsize=None):
90
+ """
91
+ Expand original boundingbox by scale.
92
+ :param bbx: original boundingbox
93
+ :param width: frame width
94
+ :param height: frame height
95
+ :param scale: bounding box size multiplier to get a bigger face region
96
+ :param minsize: set minimum bounding box size
97
+ :return: expanded bbox
98
+ """
99
+ x, y, w, h = bbox
100
+
101
+ # box center
102
+ cx = int(x + w / 2)
103
+ cy = int(y + h / 2)
104
+
105
+ # expand by scale factor
106
+ new_size = max(int(w * scale), int(h * scale))
107
+ new_x = max(0, int(cx - new_size / 2))
108
+ new_y = max(0, int(cy - new_size / 2))
109
+
110
+ # Check for too big bbox for given x, y
111
+ new_size = min(width - new_x, new_size)
112
+ new_size = min(height - new_size, new_size)
113
+
114
+ return new_x, new_y, new_size, new_size
115
+
116
+
117
+ def extract_face_MTCNN(face_detector, image, expand_scale=1.3, res=256):
118
+ # Image size
119
+ height, width = image.shape[:2]
120
+
121
+ # Convert to rgb
122
+ rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
123
+
124
+ # Detect with dlib
125
+ faces = face_detector.detect_faces(rgb)
126
+ if len(faces):
127
+ # For now only take biggest face
128
+ face = None
129
+ bbox = None
130
+ max_region = 0
131
+ for ff in faces:
132
+ if max_region == 0:
133
+ face = ff
134
+ bbox = face['box']
135
+ max_region = bbox[2]*bbox[3]
136
+ else:
137
+ bb = ff['box']
138
+ region = bb[2]*bb[3]
139
+ if region > max_rigion:
140
+ max_rigion = region
141
+ face = ff
142
+ bbox = face['box']
143
+ print(max_region)
144
+ #face = faces[0]
145
+
146
+ #bbox = face['box']
147
+
148
+ # --- Prediction ---------------------------------------------------
149
+ # Face crop with MTCNN and bounding box scale enlargement
150
+ x, y, w, h = expand_bbox(bbox, width, height, scale=expand_scale)
151
+ cropped_face = rgb[y:y+h, x:x+w]
152
+
153
+ cropped_face = cv2.resize(
154
+ cropped_face, (res, res), interpolation=cv2.INTER_CUBIC)
155
+ cropped_face = cv2.cvtColor(cropped_face, cv2.COLOR_RGB2BGR)
156
+ return cropped_face
157
+
158
+ return None
159
+
160
+
161
+ def extract_aligned_face_MTCNN(face_detector, image, expand_scale=1.3, res=256, mask=None):
162
+ # Image size
163
+ height, width = image.shape[:2]
164
+
165
+ # Convert to rgb
166
+ rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
167
+
168
+ # Detect with dlib
169
+ faces = face_detector.detect_faces(rgb)
170
+ if len(faces):
171
+ # For now only take biggest face
172
+ face = None
173
+ bbox = None
174
+ max_region = 0
175
+ for i, ff in enumerate(faces):
176
+ if max_region == 0:
177
+ face = ff
178
+ bbox = face['box']
179
+ max_region = bbox[2]*bbox[3]
180
+ else:
181
+ bb = ff['box']
182
+ region = bb[2]*bb[3]
183
+ if region > max_region:
184
+ max_region = region
185
+ face = ff
186
+ bbox = face['box']
187
+ #print('face {}: {}'.format(i, max_region))
188
+ #face = faces[0]
189
+
190
+ landmarks = get_keypts(face)
191
+
192
+ # --- Prediction ---------------------------------------------------
193
+ # Face aligned crop with MTCNN and bounding box scale enlargement
194
+ if mask is not None:
195
+ cropped_face, cropped_mask = img_align_crop(rgb, landmarks, outsize=[
196
+ res, res], scale=expand_scale, mask=mask)
197
+ cropped_face = cv2.cvtColor(cropped_face, cv2.COLOR_RGB2BGR)
198
+ cropped_mask = cv2.cvtColor(cropped_mask, cv2.COLOR_RGB2GRAY)
199
+ return cropped_face, cropped_mask
200
+ else:
201
+ cropped_face = img_align_crop(rgb, landmarks, outsize=[
202
+ res, res], scale=expand_scale)
203
+ cropped_face = cv2.cvtColor(cropped_face, cv2.COLOR_RGB2BGR)
204
+ return cropped_face
205
+
206
+ return None
207
+
208
+
209
+ def extract_face_DLIB(face_detector, image, expand_scale=1.3, res=256):
210
+ # Image size
211
+ height, width = image.shape[:2]
212
+
213
+ # Convert to gray
214
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
215
+
216
+ # Detect with dlib
217
+ faces = face_detector(gray, 1)
218
+ if len(faces):
219
+ # For now only take biggest face
220
+ face = faces[0]
221
+
222
+ x1 = face.left()
223
+ y1 = face.top()
224
+ x2 = face.right()
225
+ y2 = face.bottom()
226
+ bbox = (x1, y1, x2-x1, y2-y1)
227
+
228
+ # --- Prediction ---------------------------------------------------
229
+ # Face crop with dlib and bounding box scale enlargement
230
+ x, y, w, h = expand_bbox(bbox, width, height, scale=expand_scale)
231
+ cropped_face = image[y:y+h, x:x+w]
232
+
233
+ cropped_face = cv2.resize(
234
+ cropped_face, (res, res), interpolation=cv2.INTER_CUBIC)
235
+
236
+ return cropped_face
237
+
238
+ return None
training/dataset/ff_blend.py ADDED
@@ -0,0 +1,572 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ # author: Zhiyuan Yan
3
+ # email: [email protected]
4
+ # date: 2023-03-30
5
+
6
+ The code is designed for Face X-ray.
7
+ '''
8
+
9
+ import os
10
+ import sys
11
+ import json
12
+ import pickle
13
+ import time
14
+
15
+ import lmdb
16
+ import numpy as np
17
+ import albumentations as A
18
+ import cv2
19
+ import random
20
+ from PIL import Image
21
+ from skimage.util import random_noise
22
+ from scipy import linalg
23
+ import heapq as hq
24
+ import lmdb
25
+ import torch
26
+ from torch.autograd import Variable
27
+ from torch.utils import data
28
+ from torchvision import transforms as T
29
+ import torchvision
30
+
31
+ from dataset.utils.face_blend import *
32
+ from dataset.utils.face_align import get_align_mat_new
33
+ from dataset.utils.color_transfer import color_transfer
34
+ from dataset.utils.faceswap_utils import blendImages as alpha_blend_fea
35
+ from dataset.utils.faceswap_utils import AlphaBlend as alpha_blend
36
+ from dataset.utils.face_aug import aug_one_im, change_res
37
+ from dataset.utils.image_ae import get_pretraiend_ae
38
+ from dataset.utils.warp import warp_mask
39
+ from dataset.utils import faceswap
40
+ from scipy.ndimage.filters import gaussian_filter
41
+
42
+
43
+ class RandomDownScale(A.core.transforms_interface.ImageOnlyTransform):
44
+ def apply(self,img,**params):
45
+ return self.randomdownscale(img)
46
+
47
+ def randomdownscale(self,img):
48
+ keep_ratio=True
49
+ keep_input_shape=True
50
+ H,W,C=img.shape
51
+ ratio_list=[2,4]
52
+ r=ratio_list[np.random.randint(len(ratio_list))]
53
+ img_ds=cv2.resize(img,(int(W/r),int(H/r)),interpolation=cv2.INTER_NEAREST)
54
+ if keep_input_shape:
55
+ img_ds=cv2.resize(img_ds,(W,H),interpolation=cv2.INTER_LINEAR)
56
+ return img_ds
57
+
58
+
59
+ class FFBlendDataset(data.Dataset):
60
+ def __init__(self, config=None):
61
+
62
+ self.lmdb = config.get('lmdb', False)
63
+ if self.lmdb:
64
+ lmdb_path = os.path.join(config['lmdb_dir'], f"FaceForensics++_lmdb")
65
+ self.env = lmdb.open(lmdb_path, create=False, subdir=True, readonly=True, lock=False)
66
+
67
+ # Check if the dictionary has already been created
68
+ if os.path.exists('training/lib/nearest_face_info.pkl'):
69
+ with open('training/lib/nearest_face_info.pkl', 'rb') as f:
70
+ face_info = pickle.load(f)
71
+ else:
72
+ raise ValueError(f"Need to run the dataset/generate_xray_nearest.py before training the face xray.")
73
+ self.face_info = face_info
74
+ # Check if the dictionary has already been created
75
+ if os.path.exists('training/lib/landmark_dict_ffall.pkl'):
76
+ with open('training/lib/landmark_dict_ffall.pkl', 'rb') as f:
77
+ landmark_dict = pickle.load(f)
78
+ self.landmark_dict = landmark_dict
79
+ self.imid_list = self.get_training_imglist()
80
+ self.transforms = T.Compose([
81
+ # T.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0)),
82
+ # T.ColorJitter(hue=.05, saturation=.05),
83
+ # T.RandomHorizontalFlip(),
84
+ # T.RandomRotation(20, resample=Image.BILINEAR),
85
+ T.ToTensor(),
86
+ T.Normalize(mean=[0.5, 0.5, 0.5],
87
+ std=[0.5, 0.5, 0.5])
88
+ ])
89
+ self.data_dict = {
90
+ 'imid_list': self.imid_list
91
+ }
92
+ self.config=config
93
+ # def data_aug(self, im):
94
+ # """
95
+ # Apply data augmentation on the input image.
96
+ # """
97
+ # transform = T.Compose([
98
+ # T.ToPILImage(),
99
+ # T.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0)),
100
+ # T.ColorJitter(hue=.05, saturation=.05),
101
+ # ])
102
+ # # Apply transformations
103
+ # im_aug = transform(im)
104
+ # return im_aug
105
+
106
+ def blended_aug(self, im):
107
+ transform = A.Compose([
108
+ A.RGBShift((-20,20),(-20,20),(-20,20),p=0.3),
109
+ A.HueSaturationValue(hue_shift_limit=(-0.3,0.3), sat_shift_limit=(-0.3,0.3), val_shift_limit=(-0.3,0.3), p=0.3),
110
+ A.RandomBrightnessContrast(brightness_limit=(-0.3,0.3), contrast_limit=(-0.3,0.3), p=0.3),
111
+ A.ImageCompression(quality_lower=40, quality_upper=100,p=0.5)
112
+ ])
113
+ # Apply transformations
114
+ im_aug = transform(image=im)
115
+ return im_aug['image']
116
+
117
+
118
+ def data_aug(self, im):
119
+ """
120
+ Apply data augmentation on the input image using albumentations.
121
+ """
122
+ transform = A.Compose([
123
+ A.Compose([
124
+ A.RGBShift((-20,20),(-20,20),(-20,20),p=0.3),
125
+ A.HueSaturationValue(hue_shift_limit=(-0.3,0.3), sat_shift_limit=(-0.3,0.3), val_shift_limit=(-0.3,0.3), p=1),
126
+ A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1,0.1), p=1),
127
+ ],p=1),
128
+ A.OneOf([
129
+ RandomDownScale(p=1),
130
+ A.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=1),
131
+ ],p=1),
132
+ ], p=1.)
133
+ # Apply transformations
134
+ im_aug = transform(image=im)
135
+ return im_aug['image']
136
+
137
+
138
+ def get_training_imglist(self):
139
+ """
140
+ Get the list of training images.
141
+ """
142
+ random.seed(1024) # Fix the random seed for reproducibility
143
+ imid_list = list(self.landmark_dict.keys())
144
+ # imid_list = [imid.replace('landmarks', 'frames').replace('npy', 'png') for imid in imid_list]
145
+ random.shuffle(imid_list)
146
+ return imid_list
147
+
148
+ def load_rgb(self, file_path):
149
+ """
150
+ Load an RGB image from a file path and resize it to a specified resolution.
151
+
152
+ Args:
153
+ file_path: A string indicating the path to the image file.
154
+
155
+ Returns:
156
+ An Image object containing the loaded and resized image.
157
+
158
+ Raises:
159
+ ValueError: If the loaded image is None.
160
+ """
161
+ size = self.config['resolution'] # if self.mode == "train" else self.config['resolution']
162
+ if not self.lmdb:
163
+ if not file_path[0] == '.':
164
+ file_path = f'./{self.config["rgb_dir"]}\\'+file_path
165
+ assert os.path.exists(file_path), f"{file_path} does not exist"
166
+ img = cv2.imread(file_path)
167
+ if img is None:
168
+ raise ValueError('Loaded image is None: {}'.format(file_path))
169
+ elif self.lmdb:
170
+ with self.env.begin(write=False) as txn:
171
+ # transfer the path format from rgb-path to lmdb-key
172
+ if file_path[0]=='.':
173
+ file_path=file_path.replace('./datasets\\','')
174
+
175
+ image_bin = txn.get(file_path.encode())
176
+ image_buf = np.frombuffer(image_bin, dtype=np.uint8)
177
+ img = cv2.imdecode(image_buf, cv2.IMREAD_COLOR)
178
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
179
+ img = cv2.resize(img, (size, size), interpolation=cv2.INTER_CUBIC)
180
+ return np.array(img, dtype=np.uint8)
181
+
182
+
183
+ def load_mask(self, file_path):
184
+ """
185
+ Load a binary mask image from a file path and resize it to a specified resolution.
186
+
187
+ Args:
188
+ file_path: A string indicating the path to the mask file.
189
+
190
+ Returns:
191
+ A numpy array containing the loaded and resized mask.
192
+
193
+ Raises:
194
+ None.
195
+ """
196
+ size = self.config['resolution']
197
+ if file_path is None:
198
+ if not file_path[0] == '.':
199
+ file_path = f'./{self.config["rgb_dir"]}\\'+file_path
200
+ return np.zeros((size, size, 1))
201
+ if not self.lmdb:
202
+ if os.path.exists(file_path):
203
+ mask = cv2.imread(file_path, 0)
204
+ if mask is None:
205
+ mask = np.zeros((size, size))
206
+ else:
207
+ return np.zeros((size, size, 1))
208
+ else:
209
+ with self.env.begin(write=False) as txn:
210
+ # transfer the path format from rgb-path to lmdb-key
211
+ if file_path[0]=='.':
212
+ file_path=file_path.replace('./datasets\\','')
213
+ image_bin = txn.get(file_path.encode())
214
+ image_buf = np.frombuffer(image_bin, dtype=np.uint8)
215
+ # cv2.IMREAD_GRAYSCALE为灰度图,cv2.IMREAD_COLOR为彩色图
216
+ mask = cv2.imdecode(image_buf, cv2.IMREAD_COLOR)
217
+ mask = cv2.resize(mask, (size, size)) / 255
218
+ mask = np.expand_dims(mask, axis=2)
219
+ return np.float32(mask)
220
+
221
+ def load_landmark(self, file_path):
222
+ """
223
+ Load 2D facial landmarks from a file path.
224
+
225
+ Args:
226
+ file_path: A string indicating the path to the landmark file.
227
+
228
+ Returns:
229
+ A numpy array containing the loaded landmarks.
230
+
231
+ Raises:
232
+ None.
233
+ """
234
+ if file_path is None:
235
+ return np.zeros((81, 2))
236
+ if not self.lmdb:
237
+ if not file_path[0] == '.':
238
+ file_path = f'./{self.config["rgb_dir"]}\\'+file_path
239
+ if os.path.exists(file_path):
240
+ landmark = np.load(file_path)
241
+ else:
242
+ return np.zeros((81, 2))
243
+ else:
244
+ with self.env.begin(write=False) as txn:
245
+ # transfer the path format from rgb-path to lmdb-key
246
+ if file_path[0]=='.':
247
+ file_path=file_path.replace('./datasets\\','')
248
+ binary = txn.get(file_path.encode())
249
+ landmark = np.frombuffer(binary, dtype=np.uint32).reshape((81, 2))
250
+ return np.float32(landmark)
251
+
252
+ def preprocess_images(self, imid_fg, imid_bg):
253
+ """
254
+ Load foreground and background images and face shapes.
255
+ """
256
+ fg_im = self.load_rgb(imid_fg.replace('landmarks', 'frames').replace('npy', 'png'))
257
+ fg_im = np.array(self.data_aug(fg_im))
258
+ fg_shape = self.landmark_dict[imid_fg]
259
+ fg_shape = np.array(fg_shape, dtype=np.int32)
260
+
261
+ bg_im = self.load_rgb(imid_bg.replace('landmarks', 'frames').replace('npy', 'png'))
262
+ bg_im = np.array(self.data_aug(bg_im))
263
+ bg_shape = self.landmark_dict[imid_bg]
264
+ bg_shape = np.array(bg_shape, dtype=np.int32)
265
+
266
+ if fg_im is None:
267
+ return bg_im, bg_shape, bg_im, bg_shape
268
+ elif bg_im is None:
269
+ return fg_im, fg_shape, fg_im, fg_shape
270
+
271
+ return fg_im, fg_shape, bg_im, bg_shape
272
+
273
+
274
+ def get_fg_bg(self, one_lmk_path):
275
+ """
276
+ Get foreground and background paths
277
+ """
278
+ bg_lmk_path = one_lmk_path
279
+ # Randomly pick one from the nearest neighbors for the foreground
280
+ if bg_lmk_path in self.face_info:
281
+ fg_lmk_path = random.choice(self.face_info[bg_lmk_path])
282
+ else:
283
+ fg_lmk_path = bg_lmk_path
284
+
285
+ return fg_lmk_path, bg_lmk_path
286
+
287
+
288
+ def generate_masks(self, fg_im, fg_shape, bg_im, bg_shape):
289
+ """
290
+ Generate masks for foreground and background images.
291
+ """
292
+ fg_mask = get_mask(fg_shape, fg_im, deform=False)
293
+ bg_mask = get_mask(bg_shape, bg_im, deform=True)
294
+
295
+ # # Only do the postprocess for the background mask
296
+ bg_mask_postprocess = warp_mask(bg_mask, std=20)
297
+ return fg_mask, bg_mask_postprocess
298
+
299
+
300
+ def warp_images(self, fg_im, fg_shape, bg_im, bg_shape, fg_mask):
301
+ """
302
+ Warp foreground face onto background image using affine or 3D warping.
303
+ """
304
+ H, W, C = bg_im.shape
305
+ use_3d_warp = np.random.rand() < 0.5
306
+
307
+ if not use_3d_warp:
308
+ aff_param = np.array(get_align_mat_new(fg_shape, bg_shape)).reshape(2, 3)
309
+ warped_face = cv2.warpAffine(fg_im, aff_param, (W, H), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REFLECT)
310
+ fg_mask = cv2.warpAffine(fg_mask, aff_param, (W, H), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REFLECT)
311
+ fg_mask = fg_mask > 0
312
+ else:
313
+ warped_face = faceswap.warp_image_3d(fg_im, np.array(fg_shape[:48]), np.array(bg_shape[:48]), (H, W))
314
+ fg_mask = np.mean(warped_face, axis=2) > 0
315
+
316
+ return warped_face, fg_mask
317
+
318
+
319
+ def colorTransfer(self, src, dst, mask):
320
+ transferredDst = np.copy(dst)
321
+ maskIndices = np.where(mask != 0)
322
+ maskedSrc = src[maskIndices[0], maskIndices[1]].astype(np.float32)
323
+ maskedDst = dst[maskIndices[0], maskIndices[1]].astype(np.float32)
324
+
325
+ # Compute means and standard deviations
326
+ meanSrc = np.mean(maskedSrc, axis=0)
327
+ stdSrc = np.std(maskedSrc, axis=0)
328
+ meanDst = np.mean(maskedDst, axis=0)
329
+ stdDst = np.std(maskedDst, axis=0)
330
+
331
+ # Perform color transfer
332
+ maskedDst = (maskedDst - meanDst) * (stdSrc / stdDst) + meanSrc
333
+ maskedDst = np.clip(maskedDst, 0, 255)
334
+
335
+ # Copy the entire background into transferredDst
336
+ transferredDst = np.copy(dst)
337
+ # Now apply color transfer only to the masked region
338
+ transferredDst[maskIndices[0], maskIndices[1]] = maskedDst.astype(np.uint8)
339
+
340
+ return transferredDst
341
+
342
+
343
+ def blend_images(self, color_corrected_fg, bg_im, bg_mask, featherAmount=0.2):
344
+ """
345
+ Blend foreground and background images together.
346
+ """
347
+ # normalize the mask to have values between 0 and 1
348
+ b_mask = bg_mask / 255.
349
+
350
+ # Add an extra dimension and repeat the mask to match the number of channels in color_corrected_fg and bg_im
351
+ b_mask = np.repeat(b_mask[:, :, np.newaxis], 3, axis=2)
352
+
353
+ # Compute the alpha blending
354
+ maskIndices = np.where(b_mask != 0)
355
+ maskPts = np.hstack((maskIndices[1][:, np.newaxis], maskIndices[0][:, np.newaxis]))
356
+
357
+ # FIXME: deal with the bugs of empty maskpts
358
+ if maskPts.size == 0:
359
+ print(f"No non-zero values found in bg_mask for blending. Skipping this image.")
360
+ return color_corrected_fg # or handle this situation differently according to the needs
361
+
362
+ faceSize = np.max(maskPts, axis=0) - np.min(maskPts, axis=0)
363
+ featherAmount = featherAmount * np.max(faceSize)
364
+
365
+ hull = cv2.convexHull(maskPts)
366
+ dists = np.zeros(maskPts.shape[0])
367
+ for i in range(maskPts.shape[0]):
368
+ dists[i] = cv2.pointPolygonTest(hull, (int(maskPts[i, 0]), int(maskPts[i, 1])), True)
369
+
370
+ weights = np.clip(dists / featherAmount, 0, 1)
371
+
372
+ # Perform the blending operation
373
+ color_corrected_fg = color_corrected_fg.astype(float)
374
+ bg_im = bg_im.astype(float)
375
+ blended_image = np.copy(bg_im)
376
+ blended_image[maskIndices[0], maskIndices[1]] = weights[:, np.newaxis] * color_corrected_fg[maskIndices[0], maskIndices[1]] + (1 - weights[:, np.newaxis]) * bg_im[maskIndices[0], maskIndices[1]]
377
+
378
+ # Convert the blended image to 8-bit unsigned integers
379
+ blended_image = np.clip(blended_image, 0, 255)
380
+ blended_image = blended_image.astype(np.uint8)
381
+ return blended_image
382
+
383
+
384
+ def process_images(self, imid_fg, imid_bg, index):
385
+ """
386
+ Overview:
387
+ Process foreground and background images following the data generation pipeline (BI dataset).
388
+
389
+ Terminology:
390
+ Foreground (fg) image: The image containing the face that will be blended onto the background image.
391
+ Background (bg) image: The image onto which the face from the foreground image will be blended.
392
+ """
393
+ fg_im, fg_shape, bg_im, bg_shape = self.preprocess_images(imid_fg, imid_bg)
394
+ fg_mask, bg_mask = self.generate_masks(fg_im, fg_shape, bg_im, bg_shape)
395
+ warped_face, fg_mask = self.warp_images(fg_im, fg_shape, bg_im, bg_shape, fg_mask)
396
+
397
+ try:
398
+ # add the below two lines to make sure the bg_mask is strictly within the fg_mask
399
+ bg_mask[fg_mask == 0] = 0
400
+ color_corrected_fg = self.colorTransfer(bg_im, warped_face, bg_mask)
401
+ blended_image = self.blend_images(color_corrected_fg, bg_im, bg_mask)
402
+ # FIXME: ugly, in order to fix the problem of mask (all zero values for bg_mask)
403
+ except:
404
+ color_corrected_fg = self.colorTransfer(bg_im, warped_face, bg_mask)
405
+ blended_image = self.blend_images(color_corrected_fg, bg_im, bg_mask)
406
+ boundary = get_boundary(bg_mask)
407
+
408
+ # # Prepare images and titles for the combined image
409
+ # images = [fg_im, np.where(fg_mask>0, 255, 0), bg_im, bg_mask, color_corrected_fg, blended_image, np.where(boundary>0, 255, 0)]
410
+ # titles = ["Fg Image", "Fg Mask", "Bg Image",
411
+ # "Bg Mask", "Blended Region",
412
+ # "Blended Image", "Boundary"]
413
+
414
+ # # Save the combined image
415
+ # os.makedirs('facexray_examples_3', exist_ok=True)
416
+ # self.save_combined_image(images, titles, index, f'facexray_examples_3/combined_image_{index}.png')
417
+ return blended_image, boundary, bg_im
418
+
419
+
420
+ def post_proc(self, img):
421
+ '''
422
+ if self.mode == 'train':
423
+ #if np.random.rand() < 0.5:
424
+ # img = random_add_noise(img)
425
+ #add_gaussian_noise(img)
426
+ if np.random.rand() < 0.5:
427
+ #img, _ = change_res(img)
428
+ img = gaussian_blur(img)
429
+ '''
430
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
431
+ im_aug = self.blended_aug(img)
432
+ im_aug = Image.fromarray(np.uint8(img))
433
+ im_aug = self.transforms(im_aug)
434
+ return im_aug
435
+
436
+
437
+ @staticmethod
438
+ def save_combined_image(images, titles, index, save_path):
439
+ """
440
+ Save the combined image with titles for each single image.
441
+
442
+ Args:
443
+ images (List[np.ndarray]): List of images to be combined.
444
+ titles (List[str]): List of titles for each image.
445
+ index (int): Index of the image.
446
+ save_path (str): Path to save the combined image.
447
+ """
448
+ # Determine the maximum height and width among the images
449
+ max_height = max(image.shape[0] for image in images)
450
+ max_width = max(image.shape[1] for image in images)
451
+
452
+ # Create the canvas
453
+ canvas = np.zeros((max_height * len(images), max_width, 3), dtype=np.uint8)
454
+
455
+ # Place the images and titles on the canvas
456
+ current_height = 0
457
+ for image, title in zip(images, titles):
458
+ height, width = image.shape[:2]
459
+
460
+ # Check if image has a third dimension (color channels)
461
+ if image.ndim == 2:
462
+ # If not, add a third dimension
463
+ image = np.tile(image[..., None], (1, 1, 3))
464
+
465
+ canvas[current_height : current_height + height, :width] = image
466
+ cv2.putText(
467
+ canvas, title, (10, current_height + 30),
468
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2
469
+ )
470
+ current_height += height
471
+
472
+ # Save the combined image
473
+ cv2.imwrite(save_path, canvas)
474
+
475
+
476
+ def __getitem__(self, index):
477
+ """
478
+ Get an item from the dataset by index.
479
+ """
480
+ one_lmk_path = self.imid_list[index]
481
+ try:
482
+ label = 1 if one_lmk_path.split('/')[6]=='manipulated_sequences' else 0
483
+ except Exception as e:
484
+ label = 1 if one_lmk_path.split('\\')[6] == 'manipulated_sequences' else 0
485
+ imid_fg, imid_bg = self.get_fg_bg(one_lmk_path)
486
+ manipulate_img, boundary, imid_bg = self.process_images(imid_fg, imid_bg, index)
487
+
488
+ manipulate_img = self.post_proc(manipulate_img)
489
+ imid_bg = self.post_proc(imid_bg)
490
+ boundary = torch.from_numpy(boundary)
491
+ boundary = boundary.unsqueeze(2).permute(2, 0, 1)
492
+
493
+ # fake data
494
+ fake_data_tuple = (manipulate_img, boundary, 1)
495
+ # real data
496
+ real_data_tuple = (imid_bg, torch.zeros_like(boundary), label)
497
+
498
+ return fake_data_tuple, real_data_tuple
499
+
500
+
501
+ @staticmethod
502
+ def collate_fn(batch):
503
+ """
504
+ Collates batches of data and shuffles the images.
505
+ """
506
+ # Unzip the batch
507
+ fake_data, real_data = zip(*batch)
508
+
509
+ # Unzip the fake and real data
510
+ fake_images, fake_boundaries, fake_labels = zip(*fake_data)
511
+ real_images, real_boundaries, real_labels = zip(*real_data)
512
+
513
+ # Combine fake and real data
514
+ images = torch.stack(fake_images + real_images)
515
+ boundaries = torch.stack(fake_boundaries + real_boundaries)
516
+ labels = torch.tensor(fake_labels + real_labels)
517
+
518
+ # Combine images, boundaries, and labels into tuples
519
+ combined_data = list(zip(images, boundaries, labels))
520
+
521
+ # Shuffle the combined data
522
+ random.shuffle(combined_data)
523
+
524
+ # Unzip the shuffled data
525
+ images, boundaries, labels = zip(*combined_data)
526
+
527
+ # Create the data dictionary
528
+ data_dict = {
529
+ 'image': torch.stack(images),
530
+ 'label': torch.tensor(labels),
531
+ 'mask': torch.stack(boundaries), # Assuming boundaries are your masks
532
+ 'landmark': None # Add your landmark data if available
533
+ }
534
+
535
+ return data_dict
536
+
537
+
538
+ def __len__(self):
539
+ """
540
+ Get the length of the dataset.
541
+ """
542
+ return len(self.imid_list)
543
+
544
+
545
+ if __name__ == "__main__":
546
+ dataset = FFBlendDataset()
547
+ print('dataset lenth: ', len(dataset))
548
+
549
+ def tensor2bgr(im):
550
+ img = im.squeeze().cpu().numpy().transpose(1, 2, 0)
551
+ img = (img + 1)/2 * 255
552
+ img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
553
+ return img
554
+
555
+ def tensor2gray(im):
556
+ img = im.squeeze().cpu().numpy()
557
+ img = img * 255
558
+ return img
559
+
560
+ for i, data_dict in enumerate(dataset):
561
+ if i > 20:
562
+ break
563
+ if label == 1:
564
+ if not use_mouth:
565
+ img, boudary = im
566
+ cv2.imwrite('{}_whole.png'.format(i), tensor2bgr(img))
567
+ cv2.imwrite('{}_boudnary.png'.format(i), tensor2gray(boudary))
568
+ else:
569
+ img, mouth, boudary = im
570
+ cv2.imwrite('{}_whole.png'.format(i), tensor2bgr(img))
571
+ cv2.imwrite('{}_mouth.png'.format(i), tensor2bgr(mouth))
572
+ cv2.imwrite('{}_boudnary.png'.format(i), tensor2gray(boudary))
training/dataset/fwa_blend.py ADDED
@@ -0,0 +1,548 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ # author: Zhiyuan Yan
3
+ # email: [email protected]
4
+ # date: 2023-03-30
5
+
6
+ The code is designed for FWA and mainly modified from the below link:
7
+ https://github.com/yuezunli/DSP-FWA
8
+ '''
9
+
10
+ import os
11
+ import sys
12
+ import json
13
+ import pickle
14
+ import time
15
+
16
+ import dlib
17
+ import numpy as np
18
+ from copy import deepcopy
19
+ import cv2
20
+ import random
21
+ from PIL import Image
22
+ from skimage.util import random_noise
23
+ from skimage.draw import polygon
24
+ from scipy import linalg
25
+ import heapq as hq
26
+ import albumentations as A
27
+
28
+ import torch
29
+ from torch.autograd import Variable
30
+ from torch.utils import data
31
+ from torchvision import transforms as T
32
+ import torchvision
33
+
34
+ from dataset.utils.face_blend import *
35
+ from dataset.utils.face_align import get_align_mat_new
36
+ from dataset.utils.color_transfer import color_transfer
37
+ from dataset.utils.faceswap_utils import blendImages as alpha_blend_fea
38
+ from dataset.utils.faceswap_utils import AlphaBlend as alpha_blend
39
+ from dataset.utils.face_aug import aug_one_im, change_res
40
+ from dataset.utils.image_ae import get_pretraiend_ae
41
+ from dataset.utils.warp import warp_mask
42
+ from dataset.utils import faceswap
43
+ from scipy.ndimage.filters import gaussian_filter
44
+ from skimage.transform import AffineTransform, warp
45
+
46
+ from dataset.abstract_dataset import DeepfakeAbstractBaseDataset
47
+
48
+
49
+ # Define face detector and predictor models
50
+ face_detector = dlib.get_frontal_face_detector()
51
+ predictor_path = 'preprocessing/dlib_tools/shape_predictor_81_face_landmarks.dat'
52
+ face_predictor = dlib.shape_predictor(predictor_path)
53
+
54
+
55
+ mean_face_x = np.array([
56
+ 0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124,
57
+ 0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036,
58
+ 0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918,
59
+ 0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149,
60
+ 0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721,
61
+ 0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874,
62
+ 0.553364, 0.490127, 0.42689])
63
+
64
+ mean_face_y = np.array([
65
+ 0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891,
66
+ 0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326,
67
+ 0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733,
68
+ 0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099,
69
+ 0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805,
70
+ 0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746,
71
+ 0.784792, 0.824182, 0.831803, 0.824182])
72
+
73
+ landmarks_2D = np.stack([mean_face_x, mean_face_y], axis=1)
74
+
75
+
76
+ class RandomDownScale(A.core.transforms_interface.ImageOnlyTransform):
77
+ def apply(self,img,**params):
78
+ return self.randomdownscale(img)
79
+
80
+ def randomdownscale(self,img):
81
+ keep_ratio=True
82
+ keep_input_shape=True
83
+ H,W,C=img.shape
84
+ ratio_list=[2,4]
85
+ r=ratio_list[np.random.randint(len(ratio_list))]
86
+ img_ds=cv2.resize(img,(int(W/r),int(H/r)),interpolation=cv2.INTER_NEAREST)
87
+ if keep_input_shape:
88
+ img_ds=cv2.resize(img_ds,(W,H),interpolation=cv2.INTER_LINEAR)
89
+ return img_ds
90
+
91
+
92
+ def umeyama( src, dst, estimate_scale ):
93
+ """Estimate N-D similarity transformation with or without scaling.
94
+ Parameters
95
+ ----------
96
+ src : (M, N) array
97
+ Source coordinates.
98
+ dst : (M, N) array
99
+ Destination coordinates.
100
+ estimate_scale : bool
101
+ Whether to estimate scaling factor.
102
+ Returns
103
+ -------
104
+ T : (N + 1, N + 1)
105
+ The homogeneous similarity transformation matrix. The matrix contains
106
+ NaN values only if the problem is not well-conditioned.
107
+ References
108
+ ----------
109
+ .. [1] "Least-squares estimation of transformation parameters between two
110
+ point patterns", Shinji Umeyama, PAMI 1991, DOI: 10.1109/34.88573
111
+ """
112
+
113
+ num = src.shape[0]
114
+ dim = src.shape[1]
115
+
116
+ # Compute mean of src and dst.
117
+ src_mean = src.mean(axis=0)
118
+ dst_mean = dst.mean(axis=0)
119
+
120
+ # Subtract mean from src and dst.
121
+ src_demean = src - src_mean
122
+ dst_demean = dst - dst_mean
123
+
124
+ # Eq. (38).
125
+ A = np.dot(dst_demean.T, src_demean) / num
126
+
127
+ # Eq. (39).
128
+ d = np.ones((dim,), dtype=np.double)
129
+ if np.linalg.det(A) < 0:
130
+ d[dim - 1] = -1
131
+
132
+ T = np.eye(dim + 1, dtype=np.double)
133
+
134
+ U, S, V = np.linalg.svd(A)
135
+
136
+ # Eq. (40) and (43).
137
+ rank = np.linalg.matrix_rank(A)
138
+ if rank == 0:
139
+ return np.nan * T
140
+ elif rank == dim - 1:
141
+ if np.linalg.det(U) * np.linalg.det(V) > 0:
142
+ T[:dim, :dim] = np.dot(U, V)
143
+ else:
144
+ s = d[dim - 1]
145
+ d[dim - 1] = -1
146
+ T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V))
147
+ d[dim - 1] = s
148
+ else:
149
+ T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V.T))
150
+
151
+ if estimate_scale:
152
+ # Eq. (41) and (42).
153
+ scale = 1.0 / src_demean.var(axis=0).sum() * np.dot(S, d)
154
+ else:
155
+ scale = 1.0
156
+
157
+ T[:dim, dim] = dst_mean - scale * np.dot(T[:dim, :dim], src_mean.T)
158
+ T[:dim, :dim] *= scale
159
+
160
+ return T
161
+
162
+
163
+ def shape_to_np(shape, dtype="int"):
164
+ # initialize the list of (x, y)-coordinates
165
+ coords = np.zeros((68, 2), dtype=dtype)
166
+
167
+ # loop over the 68 facial landmarks and convert them
168
+ # to a 2-tuple of (x, y)-coordinates
169
+ for i in range(0, 68):
170
+ coords[i] = (shape.part(i).x, shape.part(i).y)
171
+
172
+ # return the list of (x, y)-coordinates
173
+ return coords
174
+
175
+
176
+ from skimage.transform import AffineTransform, warp
177
+
178
+ def get_warped_face(face, landmarks, tform):
179
+ """
180
+ Apply the given affine transformation to the face and landmarks.
181
+
182
+ Args:
183
+ face (np.ndarray): The face image to be transformed.
184
+ landmarks (np.ndarray): The facial landmarks to be transformed.
185
+ tform (AffineTransform): The transformation to apply.
186
+
187
+ Returns:
188
+ warped_face (np.ndarray): The transformed face image.
189
+ warped_landmarks (np.ndarray): The transformed facial landmarks.
190
+ """
191
+ # Apply the transformation to the face
192
+ warped_face = warp(face, tform.inverse, output_shape=face.shape)
193
+ warped_face = (warped_face * 255).astype(np.uint8)
194
+
195
+ # Apply the transformation to the landmarks
196
+ warped_landmarks = tform.inverse(landmarks)
197
+
198
+ return warped_face, warped_landmarks
199
+
200
+
201
+ def warp_face_within_landmarks(face, landmarks, tform):
202
+ """
203
+ Apply the given affine transformation to the face and landmarks,
204
+ and retain only the area within the landmarks.
205
+
206
+ Args:
207
+ face (np.ndarray): The face image to be transformed.
208
+ landmarks (np.ndarray): The facial landmarks to be transformed.
209
+ tform (AffineTransform): The transformation to apply.
210
+
211
+ Returns:
212
+ warped_face (np.ndarray): The transformed face image.
213
+ warped_landmarks (np.ndarray): The transformed facial landmarks.
214
+ """
215
+ # Apply the transformation to the face
216
+ warped_face = warp(face, tform.inverse, output_shape=face.shape)
217
+ warped_face = (warped_face * 255).astype(np.uint8)
218
+
219
+ # Apply the transformation to the landmarks
220
+ warped_landmarks = np.linalg.inv(landmarks)
221
+
222
+ # Generate a mask based on the landmarks
223
+ rr, cc = polygon(warped_landmarks[:, 1], warped_landmarks[:, 0])
224
+ mask = np.zeros_like(warped_face, dtype=np.uint8)
225
+ mask[rr, cc] = 1
226
+
227
+ # Apply the mask to the face
228
+ warped_face *= mask
229
+
230
+ return warped_face, warped_landmarks
231
+
232
+
233
+ def get_2d_aligned_face(image, mat, size, padding=[0, 0]):
234
+ mat = mat * size
235
+ mat[0, 2] += padding[0]
236
+ mat[1, 2] += padding[1]
237
+ return cv2.warpAffine(image, mat, (size + 2 * padding[0], size + 2 * padding[1]))
238
+
239
+
240
+ def get_2d_aligned_landmarks(face_cache, aligned_face_size=256, padding=(0, 0)):
241
+ mat, points = face_cache
242
+ # Mapping landmarks to aligned face
243
+ pred_ = np.concatenate([points, np.ones((points.shape[0], 1))], axis=-1)
244
+ pred_ = np.transpose(pred_)
245
+ mat = mat * aligned_face_size
246
+ mat[0, 2] += padding[0]
247
+ mat[1, 2] += padding[1]
248
+ aligned_shape = np.dot(mat, pred_)
249
+ aligned_shape = np.transpose(aligned_shape[:2, :])
250
+ return aligned_shape
251
+
252
+
253
+ def get_aligned_face_and_landmarks(im, face_cache, aligned_face_size = 256, padding=(0, 0)):
254
+ """
255
+ get all aligned faces and landmarks of all images
256
+ :param imgs: origin images
257
+ :param fa: face_alignment package
258
+ :return:
259
+ """
260
+ aligned_cur_shapes = []
261
+ aligned_cur_im = []
262
+ for mat, points in face_cache:
263
+ # Get transform matrix
264
+ aligned_face = get_2d_aligned_face(im, mat, aligned_face_size, padding)
265
+ aligned_shape = get_2d_aligned_landmarks([mat, points], aligned_face_size, padding)
266
+ aligned_cur_shapes.append(aligned_shape)
267
+ aligned_cur_im.append(aligned_face)
268
+ return aligned_cur_im, aligned_cur_shapes
269
+
270
+
271
+ def face_warp(im, face, trans_matrix, size, padding):
272
+ new_face = np.clip(face, 0, 255).astype(im.dtype)
273
+ image_size = im.shape[1], im.shape[0]
274
+
275
+ tmp_matrix = trans_matrix * size
276
+ delta_matrix = np.array([[0., 0., padding[0]*1.0], [0., 0., padding[1]*1.0]])
277
+ tmp_matrix = tmp_matrix + delta_matrix
278
+
279
+ # Warp the new face onto a blank canvas
280
+ warped_face = np.zeros_like(im)
281
+ cv2.warpAffine(new_face, tmp_matrix, image_size, warped_face, cv2.WARP_INVERSE_MAP,
282
+ cv2.BORDER_TRANSPARENT)
283
+
284
+ # Create a mask of the warped face
285
+ mask = (warped_face > 0).astype(np.uint8)
286
+
287
+ # Blend the warped face with the original image
288
+ new_image = im * (1 - mask) + warped_face * mask
289
+
290
+ return new_image, mask
291
+
292
+
293
+ def get_face_loc(im, face_detector, scale=0):
294
+ """ get face locations, color order of images is rgb """
295
+ faces = face_detector(np.uint8(im), scale)
296
+ face_list = []
297
+ if faces is not None or len(faces) > 0:
298
+ for i, d in enumerate(faces):
299
+ try:
300
+ face_list.append([d.left(), d.top(), d.right(), d.bottom()])
301
+ except:
302
+ face_list.append([d.rect.left(), d.rect.top(), d.rect.right(), d.rect.bottom()])
303
+ return face_list
304
+
305
+
306
+
307
+ def align(im, face_detector, lmark_predictor, scale=0):
308
+ # This version we handle all faces in view
309
+ # channel order rgb
310
+ im = np.uint8(im)
311
+ faces = face_detector(im, scale)
312
+ face_list = []
313
+ if faces is not None or len(faces) > 0:
314
+ for pred in faces:
315
+ try:
316
+ points = shape_to_np(lmark_predictor(im, pred))
317
+ except:
318
+ points = shape_to_np(lmark_predictor(im, pred.rect))
319
+ trans_matrix = umeyama(points[17:], landmarks_2D, True)[0:2]
320
+ face_list.append([trans_matrix, points])
321
+ return face_list
322
+
323
+
324
+ class FWABlendDataset(DeepfakeAbstractBaseDataset):
325
+ def __init__(self, config=None):
326
+ super().__init__(config, mode='train')
327
+ self.transforms = T.Compose([
328
+ T.ToTensor(),
329
+ T.Normalize(mean=config['mean'],
330
+ std=config['std'])
331
+ ])
332
+ self.resolution = config['resolution']
333
+
334
+
335
+ def blended_aug(self, im):
336
+ transform = A.Compose([
337
+ A.RGBShift((-20,20),(-20,20),(-20,20),p=0.3),
338
+ A.HueSaturationValue(hue_shift_limit=(-0.3,0.3), sat_shift_limit=(-0.3,0.3), val_shift_limit=(-0.3,0.3), p=0.3),
339
+ A.RandomBrightnessContrast(brightness_limit=(-0.3,0.3), contrast_limit=(-0.3,0.3), p=0.3),
340
+ A.ImageCompression(quality_lower=40, quality_upper=100,p=0.5)
341
+ ])
342
+ # Apply transformations
343
+ im_aug = transform(image=im)
344
+ return im_aug['image']
345
+
346
+
347
+ def data_aug(self, im):
348
+ """
349
+ Apply data augmentation on the input image using albumentations.
350
+ """
351
+ transform = A.Compose([
352
+ A.Compose([
353
+ A.RGBShift((-20,20),(-20,20),(-20,20),p=0.3),
354
+ A.HueSaturationValue(hue_shift_limit=(-0.3,0.3), sat_shift_limit=(-0.3,0.3), val_shift_limit=(-0.3,0.3), p=1),
355
+ A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1,0.1), p=1),
356
+ ],p=1),
357
+ A.OneOf([
358
+ RandomDownScale(p=1),
359
+ A.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=1),
360
+ ],p=1),
361
+ ], p=1.)
362
+ # Apply transformations
363
+ im_aug = transform(image=im)
364
+ return im_aug['image']
365
+
366
+
367
+ def blend_images(self, img_path):
368
+ #im = cv2.imread(img_path)
369
+ im = np.array(self.load_rgb(img_path))
370
+
371
+ # Get the alignment of the head
372
+ face_cache = align(im, face_detector, face_predictor)
373
+
374
+ # Get the aligned face and landmarks
375
+ aligned_im_head, aligned_shape = get_aligned_face_and_landmarks(im, face_cache)
376
+ # If no faces were detected in the image, return None (or any suitable value)
377
+ if len(aligned_im_head) == 0 or len(aligned_shape) == 0:
378
+ return None, None
379
+ aligned_im_head = aligned_im_head[0]
380
+ aligned_shape = aligned_shape[0]
381
+
382
+ # Apply transformations to the face
383
+ scale_factor = random.choice([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
384
+ scaled_face = cv2.resize(aligned_im_head, (0, 0), fx=scale_factor, fy=scale_factor)
385
+
386
+ # Apply Gaussian blur to the scaled face
387
+ blurred_face = cv2.GaussianBlur(scaled_face, (5, 5), 0)
388
+
389
+ # Resize the processed image back to the original size
390
+ resized_face = cv2.resize(blurred_face, (aligned_im_head.shape[1], aligned_im_head.shape[0]))
391
+
392
+ # Generate a random facial mask
393
+ mask = get_mask(aligned_shape.astype(np.float32), resized_face, std=20, deform=True)
394
+
395
+ # Apply the mask to the resized face
396
+ masked_face = cv2.bitwise_and(resized_face, resized_face, mask=mask)
397
+
398
+ # do aug before warp
399
+ im = np.array(self.blended_aug(im))
400
+
401
+ # Warp the face back to the original image
402
+ im, masked_face = face_warp(im, masked_face, face_cache[0][0], self.resolution, [0, 0])
403
+ shape = get_2d_aligned_landmarks(face_cache[0], self.resolution, [0, 0])
404
+ return im, masked_face
405
+
406
+
407
+ def process_images(self, img_path, index):
408
+ """
409
+ Process an image following the data generation pipeline.
410
+ """
411
+ blended_im, mask = self.blend_images(img_path)
412
+
413
+ # Prepare images and titles for the combined image
414
+ imid_fg = np.array(self.load_rgb(img_path))
415
+ imid_fg = np.array(self.data_aug(imid_fg))
416
+
417
+ if blended_im is None or mask is None:
418
+ return imid_fg, None
419
+
420
+ # images = [
421
+ # imid_fg,
422
+ # np.where(mask.astype(np.uint8)>0, 255, 0),
423
+ # blended_im,
424
+ # ]
425
+ # titles = ["Image", "Mask", "Blended Image"]
426
+
427
+ # # Save the combined image
428
+ # os.makedirs('fwa_examples_2', exist_ok=True)
429
+ # self.save_combined_image(images, titles, index, f'fwa_examples_2/combined_image_{index}.png')
430
+ return imid_fg, blended_im
431
+
432
+
433
+ def post_proc(self, img):
434
+ '''
435
+ if self.mode == 'train':
436
+ #if np.random.rand() < 0.5:
437
+ # img = random_add_noise(img)
438
+ #add_gaussian_noise(img)
439
+ if np.random.rand() < 0.5:
440
+ #img, _ = change_res(img)
441
+ img = gaussian_blur(img)
442
+ '''
443
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
444
+ im_aug = self.blended_aug(img)
445
+ im_aug = Image.fromarray(np.uint8(img))
446
+ im_aug = self.transforms(im_aug)
447
+ return im_aug
448
+
449
+
450
+ @staticmethod
451
+ def save_combined_image(images, titles, index, save_path):
452
+ """
453
+ Save the combined image with titles for each single image.
454
+
455
+ Args:
456
+ images (List[np.ndarray]): List of images to be combined.
457
+ titles (List[str]): List of titles for each image.
458
+ index (int): Index of the image.
459
+ save_path (str): Path to save the combined image.
460
+ """
461
+ # Determine the maximum height and width among the images
462
+ max_height = max(image.shape[0] for image in images)
463
+ max_width = max(image.shape[1] for image in images)
464
+
465
+ # Create the canvas
466
+ canvas = np.zeros((max_height * len(images), max_width, 3), dtype=np.uint8)
467
+
468
+ # Place the images and titles on the canvas
469
+ current_height = 0
470
+ for image, title in zip(images, titles):
471
+ height, width = image.shape[:2]
472
+
473
+ # Check if image has a third dimension (color channels)
474
+ if image.ndim == 2:
475
+ # If not, add a third dimension
476
+ image = np.tile(image[..., None], (1, 1, 3))
477
+
478
+ canvas[current_height : current_height + height, :width] = image
479
+ cv2.putText(
480
+ canvas, title, (10, current_height + 30),
481
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2
482
+ )
483
+ current_height += height
484
+
485
+ # Save the combined image
486
+ cv2.imwrite(save_path, canvas)
487
+
488
+
489
+ def __getitem__(self, index):
490
+ """
491
+ Get an item from the dataset by index.
492
+ """
493
+ one_img_path = self.data_dict['image'][index]
494
+ try:
495
+ label = 1 if one_img_path.split('/')[6]=='manipulated_sequences' else 0
496
+ except Exception as e:
497
+ label = 1 if one_img_path.split('\\')[6] == 'manipulated_sequences' else 0
498
+ blend_label = 1
499
+ imid, manipulate_img = self.process_images(one_img_path, index)
500
+
501
+ if manipulate_img is None:
502
+ manipulate_img = deepcopy(imid)
503
+ blend_label = label
504
+ manipulate_img = self.post_proc(manipulate_img)
505
+ imid = self.post_proc(imid)
506
+
507
+ # blend data
508
+ fake_data_tuple = (manipulate_img, blend_label)
509
+ # original data
510
+ real_data_tuple = (imid, label)
511
+
512
+ return fake_data_tuple, real_data_tuple
513
+
514
+
515
+ @staticmethod
516
+ def collate_fn(batch):
517
+ """
518
+ Collates batches of data and shuffles the images.
519
+ """
520
+ # Unzip the batch
521
+ fake_data, real_data = zip(*batch)
522
+
523
+ # Unzip the fake and real data
524
+ fake_images, fake_labels = zip(*fake_data)
525
+ real_images, real_labels = zip(*real_data)
526
+
527
+ # Combine fake and real data
528
+ images = torch.stack(fake_images + real_images)
529
+ labels = torch.tensor(fake_labels + real_labels)
530
+
531
+ # Combine images, boundaries, and labels into tuples
532
+ combined_data = list(zip(images, labels))
533
+
534
+ # Shuffle the combined data
535
+ random.shuffle(combined_data)
536
+
537
+ # Unzip the shuffled data
538
+ images, labels = zip(*combined_data)
539
+
540
+ # Create the data dictionary
541
+ data_dict = {
542
+ 'image': torch.stack(images),
543
+ 'label': torch.tensor(labels),
544
+ 'mask': None,
545
+ 'landmark': None # Add your landmark data if available
546
+ }
547
+
548
+ return data_dict
training/dataset/generate_parsing_mask.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ # author: Zhiyuan Yan
3
+ # email: [email protected]
4
+ # date: 2024-01-26
5
+
6
+ The code is designed for self-blending method (SBI, CVPR 2024).
7
+ '''
8
+
9
+ import sys
10
+ sys.path.append('.')
11
+
12
+ import os
13
+ import cv2
14
+ import yaml
15
+ import random
16
+ import torch
17
+ import torch.nn as nn
18
+ from PIL import Image
19
+ import numpy as np
20
+ from copy import deepcopy
21
+ import albumentations as A
22
+ from training.dataset.abstract_dataset import DeepfakeAbstractBaseDataset
23
+ from training.dataset.sbi_api import SBI_API
24
+ from training.dataset.utils.bi_online_generation_yzy import random_get_hull
25
+ from training.dataset.SimSwap.test_one_image import self_blend
26
+
27
+ import warnings
28
+ warnings.filterwarnings('ignore')
29
+
30
+
31
+ from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
32
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
33
+ image_processor = SegformerImageProcessor.from_pretrained("/Youtu_Pangu_Security_Public/youtu-pangu-public/zhiyuanyan/huggingface/hub/models--jonathandinu--face-parsing/snapshots/a2bf62f39dfd8f8856a3c19be8b0707a8d68abdd")
34
+ face_parser = SegformerForSemanticSegmentation.from_pretrained("/Youtu_Pangu_Security_Public/youtu-pangu-public/zhiyuanyan/huggingface/hub/models--jonathandinu--face-parsing/snapshots/a2bf62f39dfd8f8856a3c19be8b0707a8d68abdd").to(device)
35
+
36
+
37
+ def create_facial_mask(mask, with_neck=False):
38
+ facial_labels = [1, 2, 3, 4, 5, 6, 7, 10, 11, 12]
39
+ if with_neck:
40
+ facial_labels += [17]
41
+ facial_mask = np.zeros_like(mask, dtype=bool)
42
+ for label in facial_labels:
43
+ facial_mask |= (mask == label)
44
+ return facial_mask.astype(np.uint8) * 255
45
+
46
+
47
+ def face_parsing_mask(img1, with_neck=False):
48
+ # run inference on image
49
+ img1 = Image.fromarray(img1)
50
+ inputs = image_processor(images=img1, return_tensors="pt").to(device)
51
+ outputs = face_parser(**inputs)
52
+ logits = outputs.logits # shape (batch_size, num_labels, ~height/4, ~width/4)
53
+
54
+ # resize output to match input image dimensions
55
+ upsampled_logits = nn.functional.interpolate(logits,
56
+ size=img1.size[::-1], # H x W
57
+ mode='bilinear',
58
+ align_corners=False)
59
+ labels = upsampled_logits.argmax(dim=1)[0]
60
+ mask = labels.cpu().numpy()
61
+ mask = create_facial_mask(mask, with_neck)
62
+ return mask
63
+
64
+
65
+ class YZYDataset(DeepfakeAbstractBaseDataset):
66
+ def __init__(self, config=None, mode='train'):
67
+ super().__init__(config, mode)
68
+
69
+ # Get real lists
70
+ # Fix the label of real images to be 0
71
+ self.real_imglist = [(img, label) for img, label in zip(self.image_list, self.label_list) if label == 0]
72
+
73
+
74
+ def __getitem__(self, index):
75
+ # Get the real image paths and labels
76
+ real_image_path, real_label = self.real_imglist[index]
77
+ # real_image_path = real_image_path.replace('/Youtu_Pangu_Security_Public/', '/Youtu_Pangu_Security/public/')
78
+
79
+ # Load the real images
80
+ real_image = self.load_rgb(real_image_path)
81
+ real_image = np.array(real_image) # Convert to numpy array
82
+
83
+ # Face Parsing
84
+ mask = face_parsing_mask(real_image, with_neck=False)
85
+ parse_mask_path = real_image_path.replace('frames', 'parse_mask')
86
+ os.makedirs(os.path.dirname(parse_mask_path), exist_ok=True)
87
+ cv2.imwrite(parse_mask_path, mask)
88
+
89
+ # # SRI generation
90
+ # sri_image = self_blend(real_image)
91
+ # sri_path = real_image_path.replace('frames', 'sri_frames')
92
+ # os.makedirs(os.path.dirname(sri_path), exist_ok=True)
93
+ # cv2.imwrite(sri_path, sri_image)
94
+
95
+ @staticmethod
96
+ def collate_fn(batch):
97
+ data_dict = {
98
+ 'image': None,
99
+ 'label': None,
100
+ 'landmark': None,
101
+ 'mask': None,
102
+ }
103
+ return data_dict
104
+
105
+ def __len__(self):
106
+ return len(self.real_imglist)
107
+
108
+
109
+
110
+ if __name__ == '__main__':
111
+ with open('./training/config/detector/sbi.yaml', 'r') as f:
112
+ config = yaml.safe_load(f)
113
+ with open('./training/config/train_config.yaml', 'r') as f:
114
+ config2 = yaml.safe_load(f)
115
+ config2['data_manner'] = 'lmdb'
116
+ config['dataset_json_folder'] = '/Youtu_Pangu_Security_Public/youtu-pangu-public/zhiyuanyan/DeepfakeBenchv2/preprocessing/dataset_json'
117
+ config.update(config2)
118
+ train_set = YZYDataset(config=config, mode='train')
119
+ train_data_loader = \
120
+ torch.utils.data.DataLoader(
121
+ dataset=train_set,
122
+ batch_size=config['train_batchSize'],
123
+ shuffle=True,
124
+ num_workers=0,
125
+ collate_fn=train_set.collate_fn,
126
+ )
127
+ from tqdm import tqdm
128
+ for iteration, batch in enumerate(tqdm(train_data_loader)):
129
+ print(iteration)
training/dataset/generate_xray_nearest.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ # author: Zhiyuan Yan
3
+ # email: [email protected]
4
+ # date: 2023-03-30
5
+
6
+ The code is specifically designed for generating nearest sample pairs for Face X-ray.
7
+ Alternatively, you can utilize the pre-generated pkl files available in our GitHub repository. Please refer to the "Releases" section on our repository for accessing these files.
8
+ '''
9
+
10
+ import os
11
+ import json
12
+ import pickle
13
+ import numpy as np
14
+ import heapq
15
+ import random
16
+ from tqdm import tqdm
17
+ from scipy.spatial import KDTree
18
+
19
+
20
+ def load_landmark(file_path):
21
+ """
22
+ Load 2D facial landmarks from a file path.
23
+
24
+ Args:
25
+ file_path: A string indicating the path to the landmark file.
26
+
27
+ Returns:
28
+ A numpy array containing the loaded landmarks.
29
+
30
+ Raises:
31
+ None.
32
+ """
33
+ if file_path is None:
34
+ return np.zeros((81, 2))
35
+ if os.path.exists(file_path):
36
+ landmark = np.load(file_path)
37
+ return np.float32(landmark)
38
+ else:
39
+ return np.zeros((81, 2))
40
+
41
+
42
+ def get_landmark_dict(dataset_folder):
43
+ # Check if the dictionary has already been created
44
+ if os.path.exists('landmark_dict_ff.pkl'):
45
+ with open('landmark_dict_ff.pkl', 'rb') as f:
46
+ return pickle.load(f)
47
+ # Open the metadata file for the current folder
48
+ metadata_path = os.path.join(dataset_folder, "FaceForensics++.json")
49
+ with open(metadata_path, "r") as f:
50
+ metadata = json.load(f)
51
+ # Iterate over the metadata entries and add the landmark paths to the list
52
+ ff_real_data = metadata['FaceForensics++']['FF-real']
53
+ # Using dictionary comprehension to generate the landmark_dict
54
+ landmark_dict = {
55
+ frame_path.replace('frames', 'landmarks').replace(".png", ".npy"): load_landmark(
56
+ frame_path.replace('frames', 'landmarks').replace(".png", ".npy")
57
+ )
58
+ for mode, value in ff_real_data.items()
59
+ for video_name, video_info in tqdm(value['c23'].items())
60
+ for frame_path in video_info['frames']
61
+ }
62
+ # Save the dictionary to a pickle file
63
+ with open('landmark_dict_ffall.pkl', 'wb') as f:
64
+ pickle.dump(landmark_dict, f)
65
+ return landmark_dict
66
+
67
+
68
+ def get_nearest_faces_fixed_pair(landmark_info, num_neighbors):
69
+ '''
70
+ Using KDTree to find the nearest faces for each image (Much faster!!)
71
+ '''
72
+ random.seed(1024) # Fix the random seed for reproducibility
73
+
74
+ # Check if the dictionary has already been created
75
+ if os.path.exists('nearest_face_info.pkl'):
76
+ with open('nearest_face_info.pkl', 'rb') as f:
77
+ return pickle.load(f)
78
+
79
+ landmarks_array = np.array([lmk.flatten() for lmk in landmark_info.values()])
80
+ landmark_ids = list(landmark_info.keys())
81
+
82
+ # Build a KDTree using the flattened landmarks
83
+ tree = KDTree(landmarks_array)
84
+
85
+ nearest_faces = {}
86
+ for idx, this_lmk in tqdm(enumerate(landmarks_array), total=len(landmarks_array)):
87
+ # Query the KDTree for the nearest neighbors (excluding itself)
88
+ dists, indices = tree.query(this_lmk, k=num_neighbors + 1)
89
+ # Randomly pick one from the nearest N neighbors (excluding itself)
90
+ picked_idx = random.choice(indices[1:])
91
+ nearest_faces[landmark_ids[idx]] = landmark_ids[picked_idx]
92
+
93
+ # Save the dictionary to a pickle file
94
+ with open('nearest_face_info.pkl', 'wb') as f:
95
+ pickle.dump(nearest_faces, f)
96
+
97
+ return nearest_faces
98
+
99
+
100
+ def get_nearest_faces(landmark_info, num_neighbors):
101
+ '''
102
+ Using KDTree to find the nearest faces for each image (Much faster!!)
103
+ '''
104
+ random.seed(1024) # Fix the random seed for reproducibility
105
+
106
+ # Check if the dictionary has already been created
107
+ if os.path.exists('nearest_face_info.pkl'):
108
+ with open('nearest_face_info.pkl', 'rb') as f:
109
+ return pickle.load(f)
110
+
111
+ landmarks_array = np.array([lmk.flatten() for lmk in landmark_info.values()])
112
+ landmark_ids = list(landmark_info.keys())
113
+
114
+ # Build a KDTree using the flattened landmarks
115
+ tree = KDTree(landmarks_array)
116
+
117
+ nearest_faces = {}
118
+ for idx, this_lmk in tqdm(enumerate(landmarks_array), total=len(landmarks_array)):
119
+ # Query the KDTree for the nearest neighbors (excluding itself)
120
+ dists, indices = tree.query(this_lmk, k=num_neighbors + 1)
121
+ # Store the nearest N neighbors (excluding itself)
122
+ nearest_faces[landmark_ids[idx]] = [landmark_ids[i] for i in indices[1:]]
123
+
124
+ # Save the dictionary to a pickle file
125
+ with open('nearest_face_info.pkl', 'wb') as f:
126
+ pickle.dump(nearest_faces, f)
127
+
128
+ return nearest_faces
129
+
130
+ # Load the landmark dictionary and obtain the landmark dict
131
+ dataset_folder = "/home/zhiyuanyan/disfin/deepfake_benchmark/preprocessing/dataset_json/"
132
+ landmark_info = get_landmark_dict(dataset_folder)
133
+
134
+ # Get the nearest faces for each image (in landmark_dict)
135
+ num_neighbors = 100
136
+ nearest_faces_info = get_nearest_faces(landmark_info, num_neighbors) # running time: about 20 mins
training/dataset/iid_dataset.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ # author: Zhiyuan Yan
3
+ # email: [email protected]
4
+ # date: 2023-03-30
5
+
6
+ The code is designed for scenarios such as disentanglement-based methods where it is necessary to ensure an equal number of positive and negative samples.
7
+ '''
8
+ import os.path
9
+ from copy import deepcopy
10
+ import cv2
11
+ import math
12
+ import torch
13
+ import random
14
+
15
+ import yaml
16
+ from PIL import Image, ImageDraw
17
+ import numpy as np
18
+ from torch.utils.data import DataLoader
19
+
20
+ from dataset.abstract_dataset import DeepfakeAbstractBaseDataset
21
+
22
+ class IIDDataset(DeepfakeAbstractBaseDataset):
23
+ def __init__(self, config=None, mode='train'):
24
+ super().__init__(config, mode)
25
+
26
+
27
+ def __getitem__(self, index):
28
+ # Get the image paths and label
29
+ image_path = self.data_dict['image'][index]
30
+ if '\\' in image_path:
31
+ per = image_path.split('\\')[-2]
32
+ else:
33
+ per = image_path.split('/')[-2]
34
+ id_index = int(per.split('_')[-1]) # real video id
35
+ label = self.data_dict['label'][index]
36
+
37
+ # Load the image
38
+ try:
39
+ image = self.load_rgb(image_path)
40
+ except Exception as e:
41
+ # Skip this image and return the first one
42
+ print(f"Error loading image at index {index}: {e}")
43
+ return self.__getitem__(0)
44
+ image = np.array(image) # Convert to numpy array for data augmentation
45
+
46
+ # Do Data Augmentation
47
+ image_trans,_,_ = self.data_aug(image)
48
+
49
+ # To tensor and normalize
50
+ image_trans = self.normalize(self.to_tensor(image_trans))
51
+
52
+ return id_index, image_trans, label
53
+
54
+ @staticmethod
55
+ def collate_fn(batch):
56
+ """
57
+ Collate a batch of data points.
58
+
59
+ Args:
60
+ batch (list): A list of tuples containing the image tensor, the label tensor,
61
+ the landmark tensor, and the mask tensor.
62
+
63
+ Returns:
64
+ A tuple containing the image tensor, the label tensor, the landmark tensor,
65
+ and the mask tensor.
66
+ """
67
+ # Separate the image, label, landmark, and mask tensors
68
+ id_indexes, image_trans, label = zip(*batch)
69
+
70
+ # Stack the image, label, landmark, and mask tensors
71
+ images = torch.stack(image_trans, dim=0)
72
+ labels = torch.LongTensor(label)
73
+ ids = torch.LongTensor(id_indexes)
74
+ # Create a dictionary of the tensors
75
+ data_dict = {}
76
+ data_dict['image'] = images
77
+ data_dict['label'] = labels
78
+ data_dict['id_index'] = ids
79
+ data_dict['mask']=None
80
+ data_dict['landmark']=None
81
+ return data_dict
82
+
83
+
84
+ def draw_landmark(img,landmark):
85
+ draw = ImageDraw.Draw(img)
86
+
87
+ # landmark = np.stack([mean_face_x, mean_face_y], axis=1)
88
+ # landmark *=256
89
+ # 遍历每个特征点
90
+ for i, point in enumerate(landmark):
91
+ # 在图像上标记特征点
92
+ draw.ellipse((point[0] - 1, point[1] - 1, point[0] + 1, point[1] + 1), fill=(255, 0, 0))
93
+ # 在特征点旁边添加序号
94
+ draw.text((point[0], point[1]), str(i), fill=(255, 255, 255))
95
+ return img
96
+
97
+
98
+ if __name__ == '__main__':
99
+ detector_path = r"./training/config/detector/xception.yaml"
100
+ # weights_path = "./ckpts/xception/CDFv2/tb_v1/ov.pth"
101
+ with open(detector_path, 'r') as f:
102
+ config = yaml.safe_load(f)
103
+ with open('./training/config/train_config.yaml', 'r') as f:
104
+ config2 = yaml.safe_load(f)
105
+ config2['data_manner'] = 'lmdb'
106
+ config['dataset_json_folder'] = 'preprocessing/dataset_json_v3'
107
+ config.update(config2)
108
+ dataset = IIDDataset(config=config)
109
+ batch_size = 2
110
+ dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True,collate_fn=dataset.collate_fn)
111
+
112
+ for i, batch in enumerate(dataloader):
113
+ print(f"Batch {i}: {batch}")
114
+
115
+ # 如果数据集返回的是一个元组(例如,(data, target)),可以这样获取:
116
+ img = batch['img']
training/dataset/library/000_0000.png ADDED
training/dataset/library/001_0000.png ADDED
training/dataset/library/DeepFakeMask.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- coding: UTF-8 -*-
3
+ # Created by: algohunt
4
+ # Microsoft Research & Peking University
5
6
+ # Copyright (c) 2019
7
+
8
+ #!/usr/bin/env python3
9
+ """ Masks functions for faceswap.py """
10
+
11
+ import inspect
12
+ import logging
13
+ import sys
14
+
15
+ import cv2
16
+ import numpy as np
17
+
18
+ # logger = logging.getLogger(__name__) # pylint: disable=invalid-name
19
+
20
+
21
+ def get_available_masks():
22
+ """ Return a list of the available masks for cli """
23
+ masks = sorted([name for name, obj in inspect.getmembers(sys.modules[__name__])
24
+ if inspect.isclass(obj) and name != "Mask"])
25
+ masks.append("none")
26
+ # logger.debug(masks)
27
+ return masks
28
+
29
+
30
+ def get_default_mask():
31
+ """ Set the default mask for cli """
32
+ masks = get_available_masks()
33
+ default = "dfl_full"
34
+ default = default if default in masks else masks[0]
35
+ # logger.debug(default)
36
+ return default
37
+
38
+
39
+ class Mask():
40
+ """ Parent class for masks
41
+ the output mask will be <mask_type>.mask
42
+ channels: 1, 3 or 4:
43
+ 1 - Returns a single channel mask
44
+ 3 - Returns a 3 channel mask
45
+ 4 - Returns the original image with the mask in the alpha channel """
46
+
47
+ def __init__(self, landmarks, face, channels=4):
48
+ # logger.info("Initializing %s: (face_shape: %s, channels: %s, landmarks: %s)",
49
+ # self.__class__.__name__, face.shape, channels, landmarks)
50
+ self.landmarks = landmarks
51
+ self.face = face
52
+ self.channels = channels
53
+
54
+ mask = self.build_mask()
55
+ self.mask = self.merge_mask(mask)
56
+ #logger.info("Initialized %s", self.__class__.__name__)
57
+
58
+ def build_mask(self):
59
+ """ Override to build the mask """
60
+ raise NotImplementedError
61
+
62
+ def merge_mask(self, mask):
63
+ """ Return the mask in requested shape """
64
+ #logger.info("mask_shape: %s", mask.shape)
65
+ assert self.channels in (1, 3, 4), "Channels should be 1, 3 or 4"
66
+ assert mask.shape[2] == 1 and mask.ndim == 3, "Input mask be 3 dimensions with 1 channel"
67
+
68
+ if self.channels == 3:
69
+ retval = np.tile(mask, 3)
70
+ elif self.channels == 4:
71
+ retval = np.concatenate((self.face, mask), -1)
72
+ else:
73
+ retval = mask
74
+
75
+ #logger.info("Final mask shape: %s", retval.shape)
76
+ return retval
77
+
78
+
79
+ class dfl_full(Mask): # pylint: disable=invalid-name
80
+ """ DFL facial mask """
81
+ def build_mask(self):
82
+ mask = np.zeros(self.face.shape[0:2] + (1, ), dtype=np.float32)
83
+
84
+ nose_ridge = (self.landmarks[27:31], self.landmarks[33:34])
85
+ jaw = (self.landmarks[0:17],
86
+ self.landmarks[48:68],
87
+ self.landmarks[0:1],
88
+ self.landmarks[8:9],
89
+ self.landmarks[16:17])
90
+ eyes = (self.landmarks[17:27],
91
+ self.landmarks[0:1],
92
+ self.landmarks[27:28],
93
+ self.landmarks[16:17],
94
+ self.landmarks[33:34])
95
+ parts = [jaw, nose_ridge, eyes]
96
+
97
+ for item in parts:
98
+ merged = np.concatenate(item)
99
+ cv2.fillConvexPoly(mask, cv2.convexHull(merged), 255.) # pylint: disable=no-member
100
+ return mask
101
+
102
+
103
+ class components(Mask): # pylint: disable=invalid-name
104
+ """ Component model mask """
105
+ def build_mask(self):
106
+ mask = np.zeros(self.face.shape[0:2] + (1, ), dtype=np.float32)
107
+
108
+ r_jaw = (self.landmarks[0:9], self.landmarks[17:18])
109
+ l_jaw = (self.landmarks[8:17], self.landmarks[26:27])
110
+ r_cheek = (self.landmarks[17:20], self.landmarks[8:9])
111
+ l_cheek = (self.landmarks[24:27], self.landmarks[8:9])
112
+ nose_ridge = (self.landmarks[19:25], self.landmarks[8:9],)
113
+ r_eye = (self.landmarks[17:22],
114
+ self.landmarks[27:28],
115
+ self.landmarks[31:36],
116
+ self.landmarks[8:9])
117
+ l_eye = (self.landmarks[22:27],
118
+ self.landmarks[27:28],
119
+ self.landmarks[31:36],
120
+ self.landmarks[8:9])
121
+ nose = (self.landmarks[27:31], self.landmarks[31:36])
122
+ parts = [r_jaw, l_jaw, r_cheek, l_cheek, nose_ridge, r_eye, l_eye, nose]
123
+
124
+ for item in parts:
125
+ merged = np.concatenate(item)
126
+ cv2.fillConvexPoly(mask, cv2.convexHull(merged), 255.) # pylint: disable=no-member
127
+ return mask
128
+
129
+
130
+ class extended(Mask): # pylint: disable=invalid-name
131
+ """ Extended mask
132
+ Based on components mask. Attempts to extend the eyebrow points up the forehead
133
+ """
134
+ def build_mask(self):
135
+ mask = np.zeros(self.face.shape[0:2] + (1, ), dtype=np.float32)
136
+
137
+ landmarks = self.landmarks.copy()
138
+ # mid points between the side of face and eye point
139
+ ml_pnt = (landmarks[36] + landmarks[0]) // 2
140
+ mr_pnt = (landmarks[16] + landmarks[45]) // 2
141
+
142
+ # mid points between the mid points and eye
143
+ ql_pnt = (landmarks[36] + ml_pnt) // 2
144
+ qr_pnt = (landmarks[45] + mr_pnt) // 2
145
+
146
+ # Top of the eye arrays
147
+ bot_l = np.array((ql_pnt, landmarks[36], landmarks[37], landmarks[38], landmarks[39]))
148
+ bot_r = np.array((landmarks[42], landmarks[43], landmarks[44], landmarks[45], qr_pnt))
149
+
150
+ # Eyebrow arrays
151
+ top_l = landmarks[17:22]
152
+ top_r = landmarks[22:27]
153
+
154
+ # Adjust eyebrow arrays
155
+ landmarks[17:22] = top_l + ((top_l - bot_l) // 2)
156
+ landmarks[22:27] = top_r + ((top_r - bot_r) // 2)
157
+
158
+ r_jaw = (landmarks[0:9], landmarks[17:18])
159
+ l_jaw = (landmarks[8:17], landmarks[26:27])
160
+ r_cheek = (landmarks[17:20], landmarks[8:9])
161
+ l_cheek = (landmarks[24:27], landmarks[8:9])
162
+ nose_ridge = (landmarks[19:25], landmarks[8:9],)
163
+ r_eye = (landmarks[17:22], landmarks[27:28], landmarks[31:36], landmarks[8:9])
164
+ l_eye = (landmarks[22:27], landmarks[27:28], landmarks[31:36], landmarks[8:9])
165
+ nose = (landmarks[27:31], landmarks[31:36])
166
+ parts = [r_jaw, l_jaw, r_cheek, l_cheek, nose_ridge, r_eye, l_eye, nose]
167
+
168
+ for item in parts:
169
+ merged = np.concatenate(item)
170
+ cv2.fillConvexPoly(mask, cv2.convexHull(merged), 255.) # pylint: disable=no-member
171
+ return mask
172
+
173
+
174
+ class facehull(Mask): # pylint: disable=invalid-name
175
+ """ Basic face hull mask """
176
+ def build_mask(self):
177
+ mask = np.zeros(self.face.shape[0:2] + (1, ), dtype=np.float32)
178
+ hull = cv2.convexHull( # pylint: disable=no-member
179
+ np.array(self.landmarks).reshape((-1, 2)))
180
+ cv2.fillConvexPoly(mask, hull, 255.0, lineType=cv2.LINE_AA) # pylint: disable=no-member
181
+ return mask
training/dataset/library/LICENSE ADDED
@@ -0,0 +1,674 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GNU GENERAL PUBLIC LICENSE
2
+ Version 3, 29 June 2007
3
+
4
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
5
+ Everyone is permitted to copy and distribute verbatim copies
6
+ of this license document, but changing it is not allowed.
7
+
8
+ Preamble
9
+
10
+ The GNU General Public License is a free, copyleft license for
11
+ software and other kinds of works.
12
+
13
+ The licenses for most software and other practical works are designed
14
+ to take away your freedom to share and change the works. By contrast,
15
+ the GNU General Public License is intended to guarantee your freedom to
16
+ share and change all versions of a program--to make sure it remains free
17
+ software for all its users. We, the Free Software Foundation, use the
18
+ GNU General Public License for most of our software; it applies also to
19
+ any other work released this way by its authors. You can apply it to
20
+ your programs, too.
21
+
22
+ When we speak of free software, we are referring to freedom, not
23
+ price. Our General Public Licenses are designed to make sure that you
24
+ have the freedom to distribute copies of free software (and charge for
25
+ them if you wish), that you receive source code or can get it if you
26
+ want it, that you can change the software or use pieces of it in new
27
+ free programs, and that you know you can do these things.
28
+
29
+ To protect your rights, we need to prevent others from denying you
30
+ these rights or asking you to surrender the rights. Therefore, you have
31
+ certain responsibilities if you distribute copies of the software, or if
32
+ you modify it: responsibilities to respect the freedom of others.
33
+
34
+ For example, if you distribute copies of such a program, whether
35
+ gratis or for a fee, you must pass on to the recipients the same
36
+ freedoms that you received. You must make sure that they, too, receive
37
+ or can get the source code. And you must show them these terms so they
38
+ know their rights.
39
+
40
+ Developers that use the GNU GPL protect your rights with two steps:
41
+ (1) assert copyright on the software, and (2) offer you this License
42
+ giving you legal permission to copy, distribute and/or modify it.
43
+
44
+ For the developers' and authors' protection, the GPL clearly explains
45
+ that there is no warranty for this free software. For both users' and
46
+ authors' sake, the GPL requires that modified versions be marked as
47
+ changed, so that their problems will not be attributed erroneously to
48
+ authors of previous versions.
49
+
50
+ Some devices are designed to deny users access to install or run
51
+ modified versions of the software inside them, although the manufacturer
52
+ can do so. This is fundamentally incompatible with the aim of
53
+ protecting users' freedom to change the software. The systematic
54
+ pattern of such abuse occurs in the area of products for individuals to
55
+ use, which is precisely where it is most unacceptable. Therefore, we
56
+ have designed this version of the GPL to prohibit the practice for those
57
+ products. If such problems arise substantially in other domains, we
58
+ stand ready to extend this provision to those domains in future versions
59
+ of the GPL, as needed to protect the freedom of users.
60
+
61
+ Finally, every program is threatened constantly by software patents.
62
+ States should not allow patents to restrict development and use of
63
+ software on general-purpose computers, but in those that do, we wish to
64
+ avoid the special danger that patents applied to a free program could
65
+ make it effectively proprietary. To prevent this, the GPL assures that
66
+ patents cannot be used to render the program non-free.
67
+
68
+ The precise terms and conditions for copying, distribution and
69
+ modification follow.
70
+
71
+ TERMS AND CONDITIONS
72
+
73
+ 0. Definitions.
74
+
75
+ "This License" refers to version 3 of the GNU General Public License.
76
+
77
+ "Copyright" also means copyright-like laws that apply to other kinds of
78
+ works, such as semiconductor masks.
79
+
80
+ "The Program" refers to any copyrightable work licensed under this
81
+ License. Each licensee is addressed as "you". "Licensees" and
82
+ "recipients" may be individuals or organizations.
83
+
84
+ To "modify" a work means to copy from or adapt all or part of the work
85
+ in a fashion requiring copyright permission, other than the making of an
86
+ exact copy. The resulting work is called a "modified version" of the
87
+ earlier work or a work "based on" the earlier work.
88
+
89
+ A "covered work" means either the unmodified Program or a work based
90
+ on the Program.
91
+
92
+ To "propagate" a work means to do anything with it that, without
93
+ permission, would make you directly or secondarily liable for
94
+ infringement under applicable copyright law, except executing it on a
95
+ computer or modifying a private copy. Propagation includes copying,
96
+ distribution (with or without modification), making available to the
97
+ public, and in some countries other activities as well.
98
+
99
+ To "convey" a work means any kind of propagation that enables other
100
+ parties to make or receive copies. Mere interaction with a user through
101
+ a computer network, with no transfer of a copy, is not conveying.
102
+
103
+ An interactive user interface displays "Appropriate Legal Notices"
104
+ to the extent that it includes a convenient and prominently visible
105
+ feature that (1) displays an appropriate copyright notice, and (2)
106
+ tells the user that there is no warranty for the work (except to the
107
+ extent that warranties are provided), that licensees may convey the
108
+ work under this License, and how to view a copy of this License. If
109
+ the interface presents a list of user commands or options, such as a
110
+ menu, a prominent item in the list meets this criterion.
111
+
112
+ 1. Source Code.
113
+
114
+ The "source code" for a work means the preferred form of the work
115
+ for making modifications to it. "Object code" means any non-source
116
+ form of a work.
117
+
118
+ A "Standard Interface" means an interface that either is an official
119
+ standard defined by a recognized standards body, or, in the case of
120
+ interfaces specified for a particular programming language, one that
121
+ is widely used among developers working in that language.
122
+
123
+ The "System Libraries" of an executable work include anything, other
124
+ than the work as a whole, that (a) is included in the normal form of
125
+ packaging a Major Component, but which is not part of that Major
126
+ Component, and (b) serves only to enable use of the work with that
127
+ Major Component, or to implement a Standard Interface for which an
128
+ implementation is available to the public in source code form. A
129
+ "Major Component", in this context, means a major essential component
130
+ (kernel, window system, and so on) of the specific operating system
131
+ (if any) on which the executable work runs, or a compiler used to
132
+ produce the work, or an object code interpreter used to run it.
133
+
134
+ The "Corresponding Source" for a work in object code form means all
135
+ the source code needed to generate, install, and (for an executable
136
+ work) run the object code and to modify the work, including scripts to
137
+ control those activities. However, it does not include the work's
138
+ System Libraries, or general-purpose tools or generally available free
139
+ programs which are used unmodified in performing those activities but
140
+ which are not part of the work. For example, Corresponding Source
141
+ includes interface definition files associated with source files for
142
+ the work, and the source code for shared libraries and dynamically
143
+ linked subprograms that the work is specifically designed to require,
144
+ such as by intimate data communication or control flow between those
145
+ subprograms and other parts of the work.
146
+
147
+ The Corresponding Source need not include anything that users
148
+ can regenerate automatically from other parts of the Corresponding
149
+ Source.
150
+
151
+ The Corresponding Source for a work in source code form is that
152
+ same work.
153
+
154
+ 2. Basic Permissions.
155
+
156
+ All rights granted under this License are granted for the term of
157
+ copyright on the Program, and are irrevocable provided the stated
158
+ conditions are met. This License explicitly affirms your unlimited
159
+ permission to run the unmodified Program. The output from running a
160
+ covered work is covered by this License only if the output, given its
161
+ content, constitutes a covered work. This License acknowledges your
162
+ rights of fair use or other equivalent, as provided by copyright law.
163
+
164
+ You may make, run and propagate covered works that you do not
165
+ convey, without conditions so long as your license otherwise remains
166
+ in force. You may convey covered works to others for the sole purpose
167
+ of having them make modifications exclusively for you, or provide you
168
+ with facilities for running those works, provided that you comply with
169
+ the terms of this License in conveying all material for which you do
170
+ not control copyright. Those thus making or running the covered works
171
+ for you must do so exclusively on your behalf, under your direction
172
+ and control, on terms that prohibit them from making any copies of
173
+ your copyrighted material outside their relationship with you.
174
+
175
+ Conveying under any other circumstances is permitted solely under
176
+ the conditions stated below. Sublicensing is not allowed; section 10
177
+ makes it unnecessary.
178
+
179
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180
+
181
+ No covered work shall be deemed part of an effective technological
182
+ measure under any applicable law fulfilling obligations under article
183
+ 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184
+ similar laws prohibiting or restricting circumvention of such
185
+ measures.
186
+
187
+ When you convey a covered work, you waive any legal power to forbid
188
+ circumvention of technological measures to the extent such circumvention
189
+ is effected by exercising rights under this License with respect to
190
+ the covered work, and you disclaim any intention to limit operation or
191
+ modification of the work as a means of enforcing, against the work's
192
+ users, your or third parties' legal rights to forbid circumvention of
193
+ technological measures.
194
+
195
+ 4. Conveying Verbatim Copies.
196
+
197
+ You may convey verbatim copies of the Program's source code as you
198
+ receive it, in any medium, provided that you conspicuously and
199
+ appropriately publish on each copy an appropriate copyright notice;
200
+ keep intact all notices stating that this License and any
201
+ non-permissive terms added in accord with section 7 apply to the code;
202
+ keep intact all notices of the absence of any warranty; and give all
203
+ recipients a copy of this License along with the Program.
204
+
205
+ You may charge any price or no price for each copy that you convey,
206
+ and you may offer support or warranty protection for a fee.
207
+
208
+ 5. Conveying Modified Source Versions.
209
+
210
+ You may convey a work based on the Program, or the modifications to
211
+ produce it from the Program, in the form of source code under the
212
+ terms of section 4, provided that you also meet all of these conditions:
213
+
214
+ a) The work must carry prominent notices stating that you modified
215
+ it, and giving a relevant date.
216
+
217
+ b) The work must carry prominent notices stating that it is
218
+ released under this License and any conditions added under section
219
+ 7. This requirement modifies the requirement in section 4 to
220
+ "keep intact all notices".
221
+
222
+ c) You must license the entire work, as a whole, under this
223
+ License to anyone who comes into possession of a copy. This
224
+ License will therefore apply, along with any applicable section 7
225
+ additional terms, to the whole of the work, and all its parts,
226
+ regardless of how they are packaged. This License gives no
227
+ permission to license the work in any other way, but it does not
228
+ invalidate such permission if you have separately received it.
229
+
230
+ d) If the work has interactive user interfaces, each must display
231
+ Appropriate Legal Notices; however, if the Program has interactive
232
+ interfaces that do not display Appropriate Legal Notices, your
233
+ work need not make them do so.
234
+
235
+ A compilation of a covered work with other separate and independent
236
+ works, which are not by their nature extensions of the covered work,
237
+ and which are not combined with it such as to form a larger program,
238
+ in or on a volume of a storage or distribution medium, is called an
239
+ "aggregate" if the compilation and its resulting copyright are not
240
+ used to limit the access or legal rights of the compilation's users
241
+ beyond what the individual works permit. Inclusion of a covered work
242
+ in an aggregate does not cause this License to apply to the other
243
+ parts of the aggregate.
244
+
245
+ 6. Conveying Non-Source Forms.
246
+
247
+ You may convey a covered work in object code form under the terms
248
+ of sections 4 and 5, provided that you also convey the
249
+ machine-readable Corresponding Source under the terms of this License,
250
+ in one of these ways:
251
+
252
+ a) Convey the object code in, or embodied in, a physical product
253
+ (including a physical distribution medium), accompanied by the
254
+ Corresponding Source fixed on a durable physical medium
255
+ customarily used for software interchange.
256
+
257
+ b) Convey the object code in, or embodied in, a physical product
258
+ (including a physical distribution medium), accompanied by a
259
+ written offer, valid for at least three years and valid for as
260
+ long as you offer spare parts or customer support for that product
261
+ model, to give anyone who possesses the object code either (1) a
262
+ copy of the Corresponding Source for all the software in the
263
+ product that is covered by this License, on a durable physical
264
+ medium customarily used for software interchange, for a price no
265
+ more than your reasonable cost of physically performing this
266
+ conveying of source, or (2) access to copy the
267
+ Corresponding Source from a network server at no charge.
268
+
269
+ c) Convey individual copies of the object code with a copy of the
270
+ written offer to provide the Corresponding Source. This
271
+ alternative is allowed only occasionally and noncommercially, and
272
+ only if you received the object code with such an offer, in accord
273
+ with subsection 6b.
274
+
275
+ d) Convey the object code by offering access from a designated
276
+ place (gratis or for a charge), and offer equivalent access to the
277
+ Corresponding Source in the same way through the same place at no
278
+ further charge. You need not require recipients to copy the
279
+ Corresponding Source along with the object code. If the place to
280
+ copy the object code is a network server, the Corresponding Source
281
+ may be on a different server (operated by you or a third party)
282
+ that supports equivalent copying facilities, provided you maintain
283
+ clear directions next to the object code saying where to find the
284
+ Corresponding Source. Regardless of what server hosts the
285
+ Corresponding Source, you remain obligated to ensure that it is
286
+ available for as long as needed to satisfy these requirements.
287
+
288
+ e) Convey the object code using peer-to-peer transmission, provided
289
+ you inform other peers where the object code and Corresponding
290
+ Source of the work are being offered to the general public at no
291
+ charge under subsection 6d.
292
+
293
+ A separable portion of the object code, whose source code is excluded
294
+ from the Corresponding Source as a System Library, need not be
295
+ included in conveying the object code work.
296
+
297
+ A "User Product" is either (1) a "consumer product", which means any
298
+ tangible personal property which is normally used for personal, family,
299
+ or household purposes, or (2) anything designed or sold for incorporation
300
+ into a dwelling. In determining whether a product is a consumer product,
301
+ doubtful cases shall be resolved in favor of coverage. For a particular
302
+ product received by a particular user, "normally used" refers to a
303
+ typical or common use of that class of product, regardless of the status
304
+ of the particular user or of the way in which the particular user
305
+ actually uses, or expects or is expected to use, the product. A product
306
+ is a consumer product regardless of whether the product has substantial
307
+ commercial, industrial or non-consumer uses, unless such uses represent
308
+ the only significant mode of use of the product.
309
+
310
+ "Installation Information" for a User Product means any methods,
311
+ procedures, authorization keys, or other information required to install
312
+ and execute modified versions of a covered work in that User Product from
313
+ a modified version of its Corresponding Source. The information must
314
+ suffice to ensure that the continued functioning of the modified object
315
+ code is in no case prevented or interfered with solely because
316
+ modification has been made.
317
+
318
+ If you convey an object code work under this section in, or with, or
319
+ specifically for use in, a User Product, and the conveying occurs as
320
+ part of a transaction in which the right of possession and use of the
321
+ User Product is transferred to the recipient in perpetuity or for a
322
+ fixed term (regardless of how the transaction is characterized), the
323
+ Corresponding Source conveyed under this section must be accompanied
324
+ by the Installation Information. But this requirement does not apply
325
+ if neither you nor any third party retains the ability to install
326
+ modified object code on the User Product (for example, the work has
327
+ been installed in ROM).
328
+
329
+ The requirement to provide Installation Information does not include a
330
+ requirement to continue to provide support service, warranty, or updates
331
+ for a work that has been modified or installed by the recipient, or for
332
+ the User Product in which it has been modified or installed. Access to a
333
+ network may be denied when the modification itself materially and
334
+ adversely affects the operation of the network or violates the rules and
335
+ protocols for communication across the network.
336
+
337
+ Corresponding Source conveyed, and Installation Information provided,
338
+ in accord with this section must be in a format that is publicly
339
+ documented (and with an implementation available to the public in
340
+ source code form), and must require no special password or key for
341
+ unpacking, reading or copying.
342
+
343
+ 7. Additional Terms.
344
+
345
+ "Additional permissions" are terms that supplement the terms of this
346
+ License by making exceptions from one or more of its conditions.
347
+ Additional permissions that are applicable to the entire Program shall
348
+ be treated as though they were included in this License, to the extent
349
+ that they are valid under applicable law. If additional permissions
350
+ apply only to part of the Program, that part may be used separately
351
+ under those permissions, but the entire Program remains governed by
352
+ this License without regard to the additional permissions.
353
+
354
+ When you convey a copy of a covered work, you may at your option
355
+ remove any additional permissions from that copy, or from any part of
356
+ it. (Additional permissions may be written to require their own
357
+ removal in certain cases when you modify the work.) You may place
358
+ additional permissions on material, added by you to a covered work,
359
+ for which you have or can give appropriate copyright permission.
360
+
361
+ Notwithstanding any other provision of this License, for material you
362
+ add to a covered work, you may (if authorized by the copyright holders of
363
+ that material) supplement the terms of this License with terms:
364
+
365
+ a) Disclaiming warranty or limiting liability differently from the
366
+ terms of sections 15 and 16 of this License; or
367
+
368
+ b) Requiring preservation of specified reasonable legal notices or
369
+ author attributions in that material or in the Appropriate Legal
370
+ Notices displayed by works containing it; or
371
+
372
+ c) Prohibiting misrepresentation of the origin of that material, or
373
+ requiring that modified versions of such material be marked in
374
+ reasonable ways as different from the original version; or
375
+
376
+ d) Limiting the use for publicity purposes of names of licensors or
377
+ authors of the material; or
378
+
379
+ e) Declining to grant rights under trademark law for use of some
380
+ trade names, trademarks, or service marks; or
381
+
382
+ f) Requiring indemnification of licensors and authors of that
383
+ material by anyone who conveys the material (or modified versions of
384
+ it) with contractual assumptions of liability to the recipient, for
385
+ any liability that these contractual assumptions directly impose on
386
+ those licensors and authors.
387
+
388
+ All other non-permissive additional terms are considered "further
389
+ restrictions" within the meaning of section 10. If the Program as you
390
+ received it, or any part of it, contains a notice stating that it is
391
+ governed by this License along with a term that is a further
392
+ restriction, you may remove that term. If a license document contains
393
+ a further restriction but permits relicensing or conveying under this
394
+ License, you may add to a covered work material governed by the terms
395
+ of that license document, provided that the further restriction does
396
+ not survive such relicensing or conveying.
397
+
398
+ If you add terms to a covered work in accord with this section, you
399
+ must place, in the relevant source files, a statement of the
400
+ additional terms that apply to those files, or a notice indicating
401
+ where to find the applicable terms.
402
+
403
+ Additional terms, permissive or non-permissive, may be stated in the
404
+ form of a separately written license, or stated as exceptions;
405
+ the above requirements apply either way.
406
+
407
+ 8. Termination.
408
+
409
+ You may not propagate or modify a covered work except as expressly
410
+ provided under this License. Any attempt otherwise to propagate or
411
+ modify it is void, and will automatically terminate your rights under
412
+ this License (including any patent licenses granted under the third
413
+ paragraph of section 11).
414
+
415
+ However, if you cease all violation of this License, then your
416
+ license from a particular copyright holder is reinstated (a)
417
+ provisionally, unless and until the copyright holder explicitly and
418
+ finally terminates your license, and (b) permanently, if the copyright
419
+ holder fails to notify you of the violation by some reasonable means
420
+ prior to 60 days after the cessation.
421
+
422
+ Moreover, your license from a particular copyright holder is
423
+ reinstated permanently if the copyright holder notifies you of the
424
+ violation by some reasonable means, this is the first time you have
425
+ received notice of violation of this License (for any work) from that
426
+ copyright holder, and you cure the violation prior to 30 days after
427
+ your receipt of the notice.
428
+
429
+ Termination of your rights under this section does not terminate the
430
+ licenses of parties who have received copies or rights from you under
431
+ this License. If your rights have been terminated and not permanently
432
+ reinstated, you do not qualify to receive new licenses for the same
433
+ material under section 10.
434
+
435
+ 9. Acceptance Not Required for Having Copies.
436
+
437
+ You are not required to accept this License in order to receive or
438
+ run a copy of the Program. Ancillary propagation of a covered work
439
+ occurring solely as a consequence of using peer-to-peer transmission
440
+ to receive a copy likewise does not require acceptance. However,
441
+ nothing other than this License grants you permission to propagate or
442
+ modify any covered work. These actions infringe copyright if you do
443
+ not accept this License. Therefore, by modifying or propagating a
444
+ covered work, you indicate your acceptance of this License to do so.
445
+
446
+ 10. Automatic Licensing of Downstream Recipients.
447
+
448
+ Each time you convey a covered work, the recipient automatically
449
+ receives a license from the original licensors, to run, modify and
450
+ propagate that work, subject to this License. You are not responsible
451
+ for enforcing compliance by third parties with this License.
452
+
453
+ An "entity transaction" is a transaction transferring control of an
454
+ organization, or substantially all assets of one, or subdividing an
455
+ organization, or merging organizations. If propagation of a covered
456
+ work results from an entity transaction, each party to that
457
+ transaction who receives a copy of the work also receives whatever
458
+ licenses to the work the party's predecessor in interest had or could
459
+ give under the previous paragraph, plus a right to possession of the
460
+ Corresponding Source of the work from the predecessor in interest, if
461
+ the predecessor has it or can get it with reasonable efforts.
462
+
463
+ You may not impose any further restrictions on the exercise of the
464
+ rights granted or affirmed under this License. For example, you may
465
+ not impose a license fee, royalty, or other charge for exercise of
466
+ rights granted under this License, and you may not initiate litigation
467
+ (including a cross-claim or counterclaim in a lawsuit) alleging that
468
+ any patent claim is infringed by making, using, selling, offering for
469
+ sale, or importing the Program or any portion of it.
470
+
471
+ 11. Patents.
472
+
473
+ A "contributor" is a copyright holder who authorizes use under this
474
+ License of the Program or a work on which the Program is based. The
475
+ work thus licensed is called the contributor's "contributor version".
476
+
477
+ A contributor's "essential patent claims" are all patent claims
478
+ owned or controlled by the contributor, whether already acquired or
479
+ hereafter acquired, that would be infringed by some manner, permitted
480
+ by this License, of making, using, or selling its contributor version,
481
+ but do not include claims that would be infringed only as a
482
+ consequence of further modification of the contributor version. For
483
+ purposes of this definition, "control" includes the right to grant
484
+ patent sublicenses in a manner consistent with the requirements of
485
+ this License.
486
+
487
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
488
+ patent license under the contributor's essential patent claims, to
489
+ make, use, sell, offer for sale, import and otherwise run, modify and
490
+ propagate the contents of its contributor version.
491
+
492
+ In the following three paragraphs, a "patent license" is any express
493
+ agreement or commitment, however denominated, not to enforce a patent
494
+ (such as an express permission to practice a patent or covenant not to
495
+ sue for patent infringement). To "grant" such a patent license to a
496
+ party means to make such an agreement or commitment not to enforce a
497
+ patent against the party.
498
+
499
+ If you convey a covered work, knowingly relying on a patent license,
500
+ and the Corresponding Source of the work is not available for anyone
501
+ to copy, free of charge and under the terms of this License, through a
502
+ publicly available network server or other readily accessible means,
503
+ then you must either (1) cause the Corresponding Source to be so
504
+ available, or (2) arrange to deprive yourself of the benefit of the
505
+ patent license for this particular work, or (3) arrange, in a manner
506
+ consistent with the requirements of this License, to extend the patent
507
+ license to downstream recipients. "Knowingly relying" means you have
508
+ actual knowledge that, but for the patent license, your conveying the
509
+ covered work in a country, or your recipient's use of the covered work
510
+ in a country, would infringe one or more identifiable patents in that
511
+ country that you have reason to believe are valid.
512
+
513
+ If, pursuant to or in connection with a single transaction or
514
+ arrangement, you convey, or propagate by procuring conveyance of, a
515
+ covered work, and grant a patent license to some of the parties
516
+ receiving the covered work authorizing them to use, propagate, modify
517
+ or convey a specific copy of the covered work, then the patent license
518
+ you grant is automatically extended to all recipients of the covered
519
+ work and works based on it.
520
+
521
+ A patent license is "discriminatory" if it does not include within
522
+ the scope of its coverage, prohibits the exercise of, or is
523
+ conditioned on the non-exercise of one or more of the rights that are
524
+ specifically granted under this License. You may not convey a covered
525
+ work if you are a party to an arrangement with a third party that is
526
+ in the business of distributing software, under which you make payment
527
+ to the third party based on the extent of your activity of conveying
528
+ the work, and under which the third party grants, to any of the
529
+ parties who would receive the covered work from you, a discriminatory
530
+ patent license (a) in connection with copies of the covered work
531
+ conveyed by you (or copies made from those copies), or (b) primarily
532
+ for and in connection with specific products or compilations that
533
+ contain the covered work, unless you entered into that arrangement,
534
+ or that patent license was granted, prior to 28 March 2007.
535
+
536
+ Nothing in this License shall be construed as excluding or limiting
537
+ any implied license or other defenses to infringement that may
538
+ otherwise be available to you under applicable patent law.
539
+
540
+ 12. No Surrender of Others' Freedom.
541
+
542
+ If conditions are imposed on you (whether by court order, agreement or
543
+ otherwise) that contradict the conditions of this License, they do not
544
+ excuse you from the conditions of this License. If you cannot convey a
545
+ covered work so as to satisfy simultaneously your obligations under this
546
+ License and any other pertinent obligations, then as a consequence you may
547
+ not convey it at all. For example, if you agree to terms that obligate you
548
+ to collect a royalty for further conveying from those to whom you convey
549
+ the Program, the only way you could satisfy both those terms and this
550
+ License would be to refrain entirely from conveying the Program.
551
+
552
+ 13. Use with the GNU Affero General Public License.
553
+
554
+ Notwithstanding any other provision of this License, you have
555
+ permission to link or combine any covered work with a work licensed
556
+ under version 3 of the GNU Affero General Public License into a single
557
+ combined work, and to convey the resulting work. The terms of this
558
+ License will continue to apply to the part which is the covered work,
559
+ but the special requirements of the GNU Affero General Public License,
560
+ section 13, concerning interaction through a network will apply to the
561
+ combination as such.
562
+
563
+ 14. Revised Versions of this License.
564
+
565
+ The Free Software Foundation may publish revised and/or new versions of
566
+ the GNU General Public License from time to time. Such new versions will
567
+ be similar in spirit to the present version, but may differ in detail to
568
+ address new problems or concerns.
569
+
570
+ Each version is given a distinguishing version number. If the
571
+ Program specifies that a certain numbered version of the GNU General
572
+ Public License "or any later version" applies to it, you have the
573
+ option of following the terms and conditions either of that numbered
574
+ version or of any later version published by the Free Software
575
+ Foundation. If the Program does not specify a version number of the
576
+ GNU General Public License, you may choose any version ever published
577
+ by the Free Software Foundation.
578
+
579
+ If the Program specifies that a proxy can decide which future
580
+ versions of the GNU General Public License can be used, that proxy's
581
+ public statement of acceptance of a version permanently authorizes you
582
+ to choose that version for the Program.
583
+
584
+ Later license versions may give you additional or different
585
+ permissions. However, no additional obligations are imposed on any
586
+ author or copyright holder as a result of your choosing to follow a
587
+ later version.
588
+
589
+ 15. Disclaimer of Warranty.
590
+
591
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592
+ APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593
+ HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594
+ OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595
+ THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596
+ PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597
+ IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598
+ ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599
+
600
+ 16. Limitation of Liability.
601
+
602
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603
+ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604
+ THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605
+ GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606
+ USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607
+ DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608
+ PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609
+ EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610
+ SUCH DAMAGES.
611
+
612
+ 17. Interpretation of Sections 15 and 16.
613
+
614
+ If the disclaimer of warranty and limitation of liability provided
615
+ above cannot be given local legal effect according to their terms,
616
+ reviewing courts shall apply local law that most closely approximates
617
+ an absolute waiver of all civil liability in connection with the
618
+ Program, unless a warranty or assumption of liability accompanies a
619
+ copy of the Program in return for a fee.
620
+
621
+ END OF TERMS AND CONDITIONS
622
+
623
+ How to Apply These Terms to Your New Programs
624
+
625
+ If you develop a new program, and you want it to be of the greatest
626
+ possible use to the public, the best way to achieve this is to make it
627
+ free software which everyone can redistribute and change under these terms.
628
+
629
+ To do so, attach the following notices to the program. It is safest
630
+ to attach them to the start of each source file to most effectively
631
+ state the exclusion of warranty; and each file should have at least
632
+ the "copyright" line and a pointer to where the full notice is found.
633
+
634
+ <one line to give the program's name and a brief idea of what it does.>
635
+ Copyright (C) <year> <name of author>
636
+
637
+ This program is free software: you can redistribute it and/or modify
638
+ it under the terms of the GNU General Public License as published by
639
+ the Free Software Foundation, either version 3 of the License, or
640
+ (at your option) any later version.
641
+
642
+ This program is distributed in the hope that it will be useful,
643
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
644
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645
+ GNU General Public License for more details.
646
+
647
+ You should have received a copy of the GNU General Public License
648
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
649
+
650
+ Also add information on how to contact you by electronic and paper mail.
651
+
652
+ If the program does terminal interaction, make it output a short
653
+ notice like this when it starts in an interactive mode:
654
+
655
+ <program> Copyright (C) <year> <name of author>
656
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657
+ This is free software, and you are welcome to redistribute it
658
+ under certain conditions; type `show c' for details.
659
+
660
+ The hypothetical commands `show w' and `show c' should show the appropriate
661
+ parts of the General Public License. Of course, your program's commands
662
+ might be different; for a GUI interface, you would use an "about box".
663
+
664
+ You should also get your employer (if you work as a programmer) or school,
665
+ if any, to sign a "copyright disclaimer" for the program, if necessary.
666
+ For more information on this, and how to apply and follow the GNU GPL, see
667
+ <https://www.gnu.org/licenses/>.
668
+
669
+ The GNU General Public License does not permit incorporating your program
670
+ into proprietary programs. If your program is a subroutine library, you
671
+ may consider it more useful to permit linking proprietary applications with
672
+ the library. If this is what you want to do, use the GNU Lesser General
673
+ Public License instead of this License. But first, please read
674
+ <https://www.gnu.org/licenses/why-not-lgpl.html>.
training/dataset/library/README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Face-X-ray
2
+ The author's unofficial PyTorch re-implementation of Face Xray
3
+
4
+ This repo contains code for the BI data generation pipeline from [Face X-ray for More General Face Forgery Detection](https://arxiv.org/abs/1912.13458) by Lingzhi Li, Jianmin Bao, Ting Zhang, Hao Yang, Dong Chen, Fang Wen, Baining Guo.
5
+
6
+ # Usage
7
+
8
+ Just run bi_online_generation.py and you can get the following result. which is describe at Figure.5 in the paper.
9
+
10
+ ![demo](all_in_one.jpg)
11
+
12
+ To get the whole BI dataset, you will need crop all the face and compute the landmarks as describe in the code.
training/dataset/library/all_in_one.jpg ADDED
training/dataset/library/bi_online_generation.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dlib
2
+ from skimage import io
3
+ from skimage import transform as sktransform
4
+ import numpy as np
5
+ from matplotlib import pyplot as plt
6
+ import json
7
+ import os
8
+ import random
9
+ from PIL import Image
10
+ from imgaug import augmenters as iaa
11
+ from .DeepFakeMask import dfl_full,facehull,components,extended
12
+ import cv2
13
+ import tqdm
14
+
15
+ def name_resolve(path):
16
+ name = os.path.splitext(os.path.basename(path))[0]
17
+ vid_id, frame_id = name.split('_')[0:2]
18
+ return vid_id, frame_id
19
+
20
+ def total_euclidean_distance(a,b):
21
+ assert len(a.shape) == 2
22
+ return np.sum(np.linalg.norm(a-b,axis=1))
23
+
24
+ def random_get_hull(landmark,img1,hull_type):
25
+ if hull_type == 0:
26
+ mask = dfl_full(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
27
+ return mask/255
28
+ elif hull_type == 1:
29
+ mask = extended(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
30
+ return mask/255
31
+ elif hull_type == 2:
32
+ mask = components(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
33
+ return mask/255
34
+ elif hull_type == 3:
35
+ mask = facehull(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
36
+ return mask/255
37
+
38
+ def random_erode_dilate(mask, ksize=None):
39
+ if random.random()>0.5:
40
+ if ksize is None:
41
+ ksize = random.randint(1,21)
42
+ if ksize % 2 == 0:
43
+ ksize += 1
44
+ mask = np.array(mask).astype(np.uint8)*255
45
+ kernel = np.ones((ksize,ksize),np.uint8)
46
+ mask = cv2.erode(mask,kernel,1)/255
47
+ else:
48
+ if ksize is None:
49
+ ksize = random.randint(1,5)
50
+ if ksize % 2 == 0:
51
+ ksize += 1
52
+ mask = np.array(mask).astype(np.uint8)*255
53
+ kernel = np.ones((ksize,ksize),np.uint8)
54
+ mask = cv2.dilate(mask,kernel,1)/255
55
+ return mask
56
+
57
+
58
+ # borrow from https://github.com/MarekKowalski/FaceSwap
59
+ def blendImages(src, dst, mask, featherAmount=0.2):
60
+
61
+ maskIndices = np.where(mask != 0)
62
+
63
+ src_mask = np.ones_like(mask)
64
+ dst_mask = np.zeros_like(mask)
65
+
66
+ maskPts = np.hstack((maskIndices[1][:, np.newaxis], maskIndices[0][:, np.newaxis]))
67
+ faceSize = np.max(maskPts, axis=0) - np.min(maskPts, axis=0)
68
+ featherAmount = featherAmount * np.max(faceSize)
69
+
70
+ hull = cv2.convexHull(maskPts)
71
+ dists = np.zeros(maskPts.shape[0])
72
+ for i in range(maskPts.shape[0]):
73
+ dists[i] = cv2.pointPolygonTest(hull, (maskPts[i, 0], maskPts[i, 1]), True)
74
+
75
+ weights = np.clip(dists / featherAmount, 0, 1)
76
+
77
+ composedImg = np.copy(dst)
78
+ composedImg[maskIndices[0], maskIndices[1]] = weights[:, np.newaxis] * src[maskIndices[0], maskIndices[1]] + (1 - weights[:, np.newaxis]) * dst[maskIndices[0], maskIndices[1]]
79
+
80
+ composedMask = np.copy(dst_mask)
81
+ composedMask[maskIndices[0], maskIndices[1]] = weights[:, np.newaxis] * src_mask[maskIndices[0], maskIndices[1]] + (
82
+ 1 - weights[:, np.newaxis]) * dst_mask[maskIndices[0], maskIndices[1]]
83
+
84
+ return composedImg, composedMask
85
+
86
+
87
+ # borrow from https://github.com/MarekKowalski/FaceSwap
88
+ def colorTransfer(src, dst, mask):
89
+ transferredDst = np.copy(dst)
90
+
91
+ maskIndices = np.where(mask != 0)
92
+
93
+
94
+ maskedSrc = src[maskIndices[0], maskIndices[1]].astype(np.int32)
95
+ maskedDst = dst[maskIndices[0], maskIndices[1]].astype(np.int32)
96
+
97
+ meanSrc = np.mean(maskedSrc, axis=0)
98
+ meanDst = np.mean(maskedDst, axis=0)
99
+
100
+ maskedDst = maskedDst - meanDst
101
+ maskedDst = maskedDst + meanSrc
102
+ maskedDst = np.clip(maskedDst, 0, 255)
103
+
104
+ transferredDst[maskIndices[0], maskIndices[1]] = maskedDst
105
+
106
+ return transferredDst
107
+
108
+ class BIOnlineGeneration():
109
+ def __init__(self):
110
+ with open('precomuted_landmarks.json', 'r') as f:
111
+ self.landmarks_record = json.load(f)
112
+ for k,v in self.landmarks_record.items():
113
+ self.landmarks_record[k] = np.array(v)
114
+ # extract all frame from all video in the name of {videoid}_{frameid}
115
+ self.data_list = [
116
+ '000_0000.png',
117
+ '001_0000.png'
118
+ ] * 10000
119
+
120
+ # predefine mask distortion
121
+ self.distortion = iaa.Sequential([iaa.PiecewiseAffine(scale=(0.01, 0.15))])
122
+
123
+ def gen_one_datapoint(self):
124
+ background_face_path = random.choice(self.data_list)
125
+ data_type = 'real' if random.randint(0,1) else 'fake'
126
+ if data_type == 'fake' :
127
+ face_img,mask = self.get_blended_face(background_face_path)
128
+ mask = ( 1 - mask ) * mask * 4
129
+ else:
130
+ face_img = io.imread(background_face_path)
131
+ mask = np.zeros((317, 317, 1))
132
+
133
+ # randomly downsample after BI pipeline
134
+ if random.randint(0,1):
135
+ aug_size = random.randint(64, 317)
136
+ face_img = Image.fromarray(face_img)
137
+ if random.randint(0,1):
138
+ face_img = face_img.resize((aug_size, aug_size), Image.BILINEAR)
139
+ else:
140
+ face_img = face_img.resize((aug_size, aug_size), Image.NEAREST)
141
+ face_img = face_img.resize((317, 317),Image.BILINEAR)
142
+ face_img = np.array(face_img)
143
+
144
+ # random jpeg compression after BI pipeline
145
+ if random.randint(0,1):
146
+ quality = random.randint(60, 100)
147
+ encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), quality]
148
+ face_img_encode = cv2.imencode('.jpg', face_img, encode_param)[1]
149
+ face_img = cv2.imdecode(face_img_encode, cv2.IMREAD_COLOR)
150
+
151
+ face_img = face_img[60:317,30:287,:]
152
+ mask = mask[60:317,30:287,:]
153
+
154
+ # random flip
155
+ if random.randint(0,1):
156
+ face_img = np.flip(face_img,1)
157
+ mask = np.flip(mask,1)
158
+
159
+ return face_img,mask,data_type
160
+
161
+ def get_blended_face(self,background_face_path):
162
+ background_face = io.imread(background_face_path)
163
+ background_landmark = self.landmarks_record[background_face_path]
164
+
165
+ foreground_face_path = self.search_similar_face(background_landmark,background_face_path)
166
+ foreground_face = io.imread(foreground_face_path)
167
+
168
+ # down sample before blending
169
+ aug_size = random.randint(128,317)
170
+ background_landmark = background_landmark * (aug_size/317)
171
+ foreground_face = sktransform.resize(foreground_face,(aug_size,aug_size),preserve_range=True).astype(np.uint8)
172
+ background_face = sktransform.resize(background_face,(aug_size,aug_size),preserve_range=True).astype(np.uint8)
173
+
174
+ # get random type of initial blending mask
175
+ mask = random_get_hull(background_landmark, background_face)
176
+
177
+ # random deform mask
178
+ mask = self.distortion.augment_image(mask)
179
+ mask = random_erode_dilate(mask)
180
+
181
+ # filte empty mask after deformation
182
+ if np.sum(mask) == 0 :
183
+ raise NotImplementedError
184
+
185
+ # apply color transfer
186
+ foreground_face = colorTransfer(background_face, foreground_face, mask*255)
187
+
188
+ # blend two face
189
+ blended_face, mask = blendImages(foreground_face, background_face, mask*255)
190
+ blended_face = blended_face.astype(np.uint8)
191
+
192
+ # resize back to default resolution
193
+ blended_face = sktransform.resize(blended_face,(317,317),preserve_range=True).astype(np.uint8)
194
+ mask = sktransform.resize(mask,(317,317),preserve_range=True)
195
+ mask = mask[:,:,0:1]
196
+ return blended_face,mask
197
+
198
+ def search_similar_face(self,this_landmark,background_face_path):
199
+ vid_id, frame_id = name_resolve(background_face_path)
200
+ min_dist = 99999999
201
+
202
+ # random sample 5000 frame from all frams:
203
+ all_candidate_path = random.sample( self.data_list, k=5000)
204
+
205
+ # filter all frame that comes from the same video as background face
206
+ all_candidate_path = filter(lambda k:name_resolve(k)[0] != vid_id, all_candidate_path)
207
+ all_candidate_path = list(all_candidate_path)
208
+
209
+ # loop throungh all candidates frame to get best match
210
+ for candidate_path in all_candidate_path:
211
+ candidate_landmark = self.landmarks_record[candidate_path].astype(np.float32)
212
+ candidate_distance = total_euclidean_distance(candidate_landmark, this_landmark)
213
+ if candidate_distance < min_dist:
214
+ min_dist = candidate_distance
215
+ min_path = candidate_path
216
+
217
+ return min_path
218
+
219
+ if __name__ == '__main__':
220
+ ds = BIOnlineGeneration()
221
+ from tqdm import tqdm
222
+ all_imgs = []
223
+ for _ in tqdm(range(50)):
224
+ img,mask,label = ds.gen_one_datapoint()
225
+ mask = np.repeat(mask,3,2)
226
+ mask = (mask*255).astype(np.uint8)
227
+ img_cat = np.concatenate([img,mask],1)
228
+ all_imgs.append(img_cat)
229
+ all_in_one = Image.new('RGB', (2570,2570))
230
+
231
+ for x in range(5):
232
+ for y in range(10):
233
+ idx = x*10+y
234
+ im = Image.fromarray(all_imgs[idx])
235
+
236
+ dx = x*514
237
+ dy = y*257
238
+
239
+ all_in_one.paste(im, (dx,dy))
240
+
241
+ all_in_one.save("all_in_one.jpg")
training/dataset/library/precomuted_landmarks.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"000_0000.png": [[56, 143], [57, 168], [61, 192], [67, 216], [76, 238], [93, 257], [112, 273], [133, 288], [156, 291], [178, 287], [198, 271], [219, 256], [236, 237], [246, 216], [250, 192], [252, 167], [252, 142], [69, 131], [84, 123], [102, 123], [119, 126], [137, 132], [178, 130], [195, 122], [213, 119], [230, 119], [244, 126], [158, 149], [158, 168], [158, 186], [159, 205], [140, 211], [148, 214], [158, 219], [168, 214], [176, 210], [91, 150], [102, 143], [116, 144], [127, 154], [115, 156], [101, 156], [188, 152], [199, 142], [213, 141], [224, 148], [214, 153], [201, 154], [117, 232], [134, 229], [148, 228], [158, 231], [168, 228], [181, 229], [195, 232], [182, 246], [169, 253], [158, 254], [147, 254], [132, 247], [125, 234], [147, 238], [158, 239], [168, 237], [188, 234], [168, 237], [158, 239], [147, 238]], "001_0000.png": [[56, 143], [57, 168], [61, 192], [67, 216], [76, 238], [93, 257], [112, 273], [133, 288], [156, 291], [178, 287], [198, 271], [219, 256], [236, 237], [246, 216], [250, 192], [252, 167], [252, 142], [69, 131], [84, 123], [102, 123], [119, 126], [137, 132], [178, 130], [195, 122], [213, 119], [230, 119], [244, 126], [158, 149], [158, 168], [158, 186], [159, 205], [140, 211], [148, 214], [158, 219], [168, 214], [176, 210], [91, 150], [102, 143], [116, 144], [127, 154], [115, 156], [101, 156], [188, 152], [199, 142], [213, 141], [224, 148], [214, 153], [201, 154], [117, 232], [134, 229], [148, 228], [158, 231], [168, 228], [181, 229], [195, 232], [182, 246], [169, 253], [158, 254], [147, 254], [132, 247], [125, 234], [147, 238], [158, 239], [168, 237], [188, 234], [168, 237], [158, 239], [147, 238]]}
training/dataset/lrl_dataset.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ current_file_path = os.path.abspath(__file__)
4
+ parent_dir = os.path.dirname(os.path.dirname(current_file_path))
5
+ project_root_dir = os.path.dirname(parent_dir)
6
+ sys.path.append(parent_dir)
7
+ sys.path.append(project_root_dir)
8
+
9
+ import cv2
10
+ import random
11
+ import yaml
12
+ import torch
13
+ import numpy as np
14
+ from copy import deepcopy
15
+ import albumentations as A
16
+ from .abstract_dataset import DeepfakeAbstractBaseDataset
17
+ from PIL import Image
18
+
19
+ c=0
20
+
21
+ class LRLDataset(DeepfakeAbstractBaseDataset):
22
+ def __init__(self, config=None, mode='train'):
23
+ super().__init__(config, mode)
24
+ global c
25
+ c=config
26
+
27
+ def multi_pass_filter(self, img, r1=0.33, r2=0.66):
28
+ rows, cols = img.shape
29
+ k = cols / rows
30
+
31
+ mask = np.zeros((rows, cols), np.uint8)
32
+ x, y = np.ogrid[:rows, :cols]
33
+ mask_area = (k * x + y < r1 * cols)
34
+ mask[mask_area] = 1
35
+ low_mask = mask
36
+
37
+ mask = np.ones((rows, cols), np.uint8)
38
+ x, y = np.ogrid[:rows, :cols]
39
+ mask_area = (k * x + y < r2 * cols)
40
+ mask[mask_area] = 0
41
+ high_mask = mask
42
+
43
+ mask1 = np.zeros((rows, cols), np.uint8)
44
+ mask1[low_mask == 0] = 1
45
+ mask2 = np.zeros((rows, cols), np.uint8)
46
+ mask2[high_mask == 0] = 1
47
+ mid_mask = mask1 * mask2
48
+
49
+ return low_mask, mid_mask, high_mask
50
+
51
+ def image2dct(self,img):
52
+ img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
53
+ img_gray = np.float32(img_gray)
54
+ img_dct = cv2.dct(img_gray)
55
+ # img_dct = np.log(np.abs(img_dct)+1e-6)
56
+
57
+ low_mask, mid_mask, high_mask = self.multi_pass_filter(img_dct, r1=0.33, r2=0.33)
58
+ img_dct_filterd = high_mask * img_dct
59
+ img_idct = cv2.idct(img_dct_filterd)
60
+
61
+ return img_idct
62
+
63
+ def __getitem__(self, index):
64
+ image_trans, label, landmark_tensors, mask_trans = super().__getitem__(index, no_norm=True)
65
+
66
+ img_idct = self.image2dct(image_trans)
67
+ # normalize idct
68
+ img_idct = (img_idct / 255 - 0.5) / 0.5
69
+ # img_idct = img_idct[np.newaxis, ...]
70
+
71
+ # To tensor and normalize for fake and real images
72
+ image_trans = self.normalize(self.to_tensor(image_trans))
73
+ img_idct_trans = self.to_tensor(img_idct)
74
+ mask_trans = torch.from_numpy(mask_trans)
75
+ mask_trans = mask_trans.squeeze(2).permute(2, 0, 1)
76
+ mask_trans = torch.mean(mask_trans, dim=0, keepdim=True)
77
+ return image_trans, label, img_idct_trans, mask_trans
78
+
79
+ def __len__(self):
80
+ return len(self.image_list)
81
+
82
+
83
+ @staticmethod
84
+ def collate_fn(batch):
85
+ """
86
+ Collate a batch of data points.
87
+
88
+ Args:
89
+ batch (list): A list of tuples containing the image tensor and label tensor.
90
+
91
+ Returns:
92
+ A tuple containing the image tensor, the label tensor, the landmark tensor,
93
+ and the mask tensor.
94
+ """
95
+ global c
96
+ images, labels, img_idct_trans, masks = zip(*batch)
97
+ # Stack the image, label, landmark, and mask tensors
98
+ images = torch.stack(images, dim=0)
99
+ labels = torch.LongTensor(labels)
100
+ masks = torch.stack(masks, dim=0)
101
+ img_idct_trans = torch.stack(img_idct_trans, dim=0)
102
+
103
+ data_dict = {
104
+ 'image': images,
105
+ 'label': labels,
106
+ 'landmark': None,
107
+ 'idct': img_idct_trans,
108
+ 'mask': masks,
109
+ }
110
+ return data_dict
111
+
112
+
113
+
114
+ if __name__ == '__main__':
115
+ with open(r'H:\code\DeepfakeBench\training\config\detector\lrl_effnb4.yaml', 'r') as f:
116
+ config = yaml.safe_load(f)
117
+ with open(r'H:\code\DeepfakeBench\training\config\train_config.yaml', 'r') as f:
118
+ config2 = yaml.safe_load(f)
119
+ random.seed(config['manualSeed'])
120
+ torch.manual_seed(config['manualSeed'])
121
+ if config['cuda']:
122
+ torch.cuda.manual_seed_all(config['manualSeed'])
123
+ config2['data_manner'] = 'lmdb'
124
+ config['dataset_json_folder'] = 'preprocessing/dataset_json_v3'
125
+ config.update(config2)
126
+ train_set = LRLDataset(config=config, mode='train')
127
+ train_data_loader = \
128
+ torch.utils.data.DataLoader(
129
+ dataset=train_set,
130
+ batch_size=4,
131
+ shuffle=True,
132
+ num_workers=0,
133
+ collate_fn=train_set.collate_fn,
134
+ )
135
+ from tqdm import tqdm
136
+ for iteration, batch in enumerate(tqdm(train_data_loader)):
137
+ print(iteration)
138
+ if iteration > 10:
139
+ break
training/dataset/lsda_dataset.py ADDED
@@ -0,0 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.append('.')
3
+
4
+ import os
5
+ import sys
6
+ import json
7
+ import math
8
+ import yaml
9
+
10
+ import numpy as np
11
+ import cv2
12
+ import random
13
+ from PIL import Image
14
+
15
+ import torch
16
+ from torch.autograd import Variable
17
+ from torch.utils import data
18
+ from torchvision import transforms as T
19
+
20
+
21
+ import skimage.draw
22
+ import albumentations as alb
23
+ from albumentations import Compose, RandomBrightnessContrast, \
24
+ HorizontalFlip, FancyPCA, HueSaturationValue, OneOf, ToGray, \
25
+ ShiftScaleRotate, ImageCompression, PadIfNeeded, GaussNoise, GaussianBlur, RandomResizedCrop
26
+ from torch.utils.data.sampler import Sampler
27
+ from .abstract_dataset import DeepfakeAbstractBaseDataset
28
+
29
+
30
+ private_path_prefix = '/home/zhaokangran/cvpr24/training'
31
+
32
+ fake_dict = {
33
+ 'real': 0,
34
+ 'Deepfakes': 1,
35
+ 'Face2Face': 2,
36
+ 'FaceSwap': 3,
37
+ 'NeuralTextures': 4,
38
+ # 'Deepfakes_Face2Face': 5,
39
+ # 'Deepfakes_FaceSwap': 6,
40
+ # 'Deepfakes_NeuralTextures': 7,
41
+ # 'Deepfakes_real': 8,
42
+ # 'Face2Face_FaceSwap': 9,
43
+ # 'Face2Face_NeuralTextures': 10,
44
+ # 'Face2Face_real': 11,
45
+ # 'FaceSwap_NeuralTextures': 12,
46
+ # 'FaceSwap_real': 13,
47
+ # 'NeuralTextures_real': 14,
48
+ }
49
+
50
+
51
+
52
+ class RandomDownScale(alb.core.transforms_interface.ImageOnlyTransform):
53
+ def apply(self,img,**params):
54
+ return self.randomdownscale(img)
55
+
56
+ def randomdownscale(self,img):
57
+ keep_ratio=True
58
+ keep_input_shape=True
59
+ H,W,C=img.shape
60
+ ratio_list=[2,4]
61
+ r=ratio_list[np.random.randint(len(ratio_list))]
62
+ img_ds=cv2.resize(img,(int(W/r),int(H/r)),interpolation=cv2.INTER_NEAREST)
63
+ if keep_input_shape:
64
+ img_ds=cv2.resize(img_ds,(W,H),interpolation=cv2.INTER_LINEAR)
65
+
66
+ return img_ds
67
+
68
+
69
+ augmentation_methods = alb.Compose([
70
+ # alb.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1,0.1), p=0.5),
71
+ # HorizontalFlip(p=0.5),
72
+ # RandomDownScale(p=0.5),
73
+ # alb.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=0.5),
74
+ alb.ImageCompression(quality_lower=40,quality_upper=100,p=0.5),
75
+ GaussianBlur(blur_limit=[3, 7], p=0.5)
76
+ ], p=1.)
77
+
78
+ augmentation_methods2 = alb.Compose([
79
+ alb.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1,0.1), p=0.5),
80
+ HorizontalFlip(p=0.5),
81
+ RandomDownScale(p=0.5),
82
+ alb.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=0.5),
83
+ alb.ImageCompression(quality_lower=40,quality_upper=100,p=0.5),
84
+ ],
85
+ additional_targets={f'image1':'image', f'image2':'image', f'image3':'image', f'image4':'image'},
86
+ p=1.)
87
+
88
+ normalize = T.Normalize(mean=[0.5, 0.5, 0.5],
89
+ std =[0.5, 0.5, 0.5])
90
+ transforms1 = T.Compose([
91
+ T.ToTensor(),
92
+ normalize
93
+ ])
94
+
95
+ #==========================================
96
+
97
+ def load_rgb(file_path, size=256):
98
+ assert os.path.exists(file_path), f"{file_path} is not exists"
99
+ img = cv2.imread(file_path)
100
+ if img is None:
101
+ raise ValueError('Img is None: {}'.format(file_path))
102
+
103
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
104
+ img = cv2.resize(img, (size, size), interpolation=cv2.INTER_CUBIC)
105
+
106
+ return Image.fromarray(np.array(img, dtype=np.uint8))
107
+
108
+
109
+ def load_mask(file_path, size=256):
110
+ mask = cv2.imread(file_path, 0)
111
+ if mask is None:
112
+ mask = np.zeros((size, size))
113
+
114
+ mask = cv2.resize(mask, (size, size))/255
115
+ mask = np.expand_dims(mask, axis=2)
116
+ return np.float32(mask)
117
+
118
+
119
+ def add_gaussian_noise(ins, mean=0, stddev=0.1):
120
+ noise = ins.data.new(ins.size()).normal_(mean, stddev)
121
+ return torch.clamp(ins + noise, -1, 1)
122
+
123
+
124
+ # class RandomBlur(object):
125
+ # """ Randomly blur an image
126
+ # """
127
+ # def __init__(self, ratio,)
128
+
129
+ # class RandomCompression(object):
130
+ # """ Randomly compress an image
131
+ # """
132
+
133
+ class CustomSampler(Sampler):
134
+ def __init__(self, num_groups=2*360, n_frame_per_vid=32, videos_per_group=5, batch_size=10):
135
+ self.num_groups = num_groups
136
+ self.n_frame_per_vid = n_frame_per_vid
137
+ self.videos_per_group = videos_per_group
138
+ self.batch_size = batch_size
139
+ assert self.batch_size % self.videos_per_group == 0, "Batch size should be a multiple of videos_per_group."
140
+ self.groups_per_batch = self.batch_size // self.videos_per_group
141
+
142
+ def __iter__(self):
143
+ group_indices = list(range(self.num_groups))
144
+ random.shuffle(group_indices)
145
+
146
+ # For each batch
147
+ for i in range(0, len(group_indices), self.groups_per_batch):
148
+ selected_groups = group_indices[i:i+self.groups_per_batch]
149
+
150
+ # For each group
151
+ for group in selected_groups:
152
+ frame_idx = random.randint(0, self.n_frame_per_vid - 1) # Random frame index for this group's videos
153
+
154
+ # Return the frame for each video in this group using the same frame_idx
155
+ for video_offset in range(self.videos_per_group):
156
+ yield group * self.videos_per_group * self.n_frame_per_vid + video_offset * self.n_frame_per_vid + frame_idx
157
+
158
+ def __len__(self):
159
+ return self.num_groups * self.videos_per_group # Total frames
160
+
161
+
162
+
163
+ class LSDADataset(DeepfakeAbstractBaseDataset):
164
+
165
+ on_3060 = "3060" in torch.cuda.get_device_name()
166
+ transfer_dict = {
167
+ 'youtube':'FF-real',
168
+ 'Deepfakes':'FF-DF',
169
+ 'Face2Face':'FF-F2F',
170
+ 'FaceSwap':'FF-FS',
171
+ 'NeuralTextures':'FF-NT'
172
+
173
+
174
+ }
175
+ if on_3060:
176
+ data_root = r'F:\Datasets\rgb\FaceForensics++'
177
+ else:
178
+ data_root = r'./datasets/FaceForensics++'
179
+ data_list = {
180
+ 'test': r'./datasets/FaceForensics++/test.json',
181
+ 'train': r'./datasets/FaceForensics++/train.json',
182
+ 'eval': r'./datasets/FaceForensics++/val.json'
183
+ }
184
+
185
+ def __init__(self, config=None, mode='train', with_dataset=['Deepfakes', 'Face2Face', 'FaceSwap', 'NeuralTextures']):
186
+ super().__init__(config, mode)
187
+ self.mode = mode
188
+ self.res = config['resolution']
189
+ self.fake_dict = fake_dict
190
+ # transform
191
+ self.normalize = T.Normalize(mean=config['mean'],
192
+ std =config['std'])
193
+ # data aug and transform
194
+ self.transforms1 = T.Compose([
195
+ T.ToTensor(),
196
+ self.normalize
197
+ ])
198
+ self.img_lines = []
199
+ self.config=config
200
+ with open(self.config['dataset_json_folder']+'/FaceForensics++.json', 'r') as fd:
201
+ self.img_json = json.load(fd)
202
+ with open(self.data_list[mode], 'r') as fd:
203
+ data = json.load(fd)
204
+ img_lines = []
205
+ for pair in data:
206
+ r1, r2 = pair
207
+ step = 1
208
+ # collect a group with 1+len(fakes) videos, each video has self.frames[mode] frames。这里就是按同一个video这种顺序来存的,所以读的时候自然只要有了offset,就能对应的取了
209
+ #此外,这里面存的压根就不是路径,而是规范化的内容。
210
+ for i in range(0, config['frame_num'][mode], step):
211
+ # collect real data here(r1)
212
+ img_lines.append(('{}/{}'.format('youtube', r1), i, 0, mode))
213
+
214
+ for fake_d in with_dataset:
215
+ # collect fake data here(r1_r2 * 4)
216
+ for i in range(0, config['frame_num'][mode], step):
217
+ img_lines.append(
218
+ ('{}/{}_{}'.format(fake_d, r1, r2), i, self.fake_dict[fake_d], mode))
219
+
220
+ for i in range(0, config['frame_num'][mode], step):
221
+ # collect real data here(r2)
222
+ img_lines.append(('{}/{}'.format('youtube', r2), i, 0, mode))
223
+
224
+ for fake_d in with_dataset:
225
+ # collect fake data here(r2_r1 * 4)
226
+ for i in range(0, config['frame_num'][mode], step):
227
+ img_lines.append(
228
+ ('{}/{}_{}'.format(fake_d, r2, r1), i, self.fake_dict[fake_d], mode))
229
+
230
+ # 2*360 (groups) * 1+len(with_dataset) (videos in each group) * self.frames[mode] (frames in each video)
231
+ assert len(img_lines) == 2*len(data) * (1 + len(with_dataset)) * config['frame_num'][mode], "to match our custom sampler, the length should be 2*360*(1+len(with_dataset))*frames[mode]"
232
+ self.img_lines.extend(img_lines)
233
+
234
+
235
+ def get_ids_from_path(self, path):
236
+ parts = path.split('/')
237
+ try:
238
+ if 'youtube' in path:
239
+ return [int(parts[-1])]
240
+ else:
241
+ return list(map(int, parts[-1].split('_')))
242
+ except:
243
+ raise ValueError("wrong path: {}".format(path))
244
+
245
+ def load_image(self, name, idx):
246
+ instance_type, video_name = name.split('/')
247
+ #其实并没有完全对应,而只是保证在同一video的目标时间区间内的一帧
248
+ all_frames = self.img_json[self.data_root.split(os.path.sep)[-1]][self.transfer_dict[instance_type]]['train']['c23'][video_name]['frames']
249
+ img_path = all_frames[idx]
250
+
251
+ impath = img_path
252
+ img = self.load_rgb(impath)
253
+ return img
254
+
255
+ def __getitem__(self, index):
256
+ name, idx, label, mode = self.img_lines[index] #这个sampler的目的是不要取重复video的图。
257
+ label = int(label) # specific fake label from 1-4
258
+
259
+ #取img没什么好说的。然后在这里把规范化的img_lines转为实际路径。
260
+ try:
261
+ img = self.load_image(name, idx)
262
+ except Exception as e:
263
+ # 下面处理不太合适,取的不是预期的video_id/fake_method,影响后面的lsda。
264
+ # random_idx = random.randint(0, len(self.img_lines)-1)
265
+ # print(f'Error loading image {name} at index {idx} due to the loading error. Try another one at index {random_idx}')
266
+ # return self.__getitem__(random_idx)
267
+
268
+ #边界条件判断,取同video的。
269
+ if idx==0:
270
+ new_index = index+1
271
+ elif idx==31:
272
+ new_index = index-1
273
+ else:
274
+ new_index = index + random.choice([-1,1]) # 通过随机防止死递归
275
+ print(f'Error loading image {name} at index {idx} due to the loading error. Try another one at index {new_index}')
276
+ return self.__getitem__(new_index)
277
+
278
+
279
+ if self.mode=='train':
280
+ # do augmentation
281
+ img = np.asarray(img) # convert PIL to numpy
282
+
283
+ img = augmentation_methods2(image=img)['image']
284
+ img = Image.fromarray(np.array(img, dtype=np.uint8)) # covnert numpy to PIL
285
+
286
+ # transform with PIL as input
287
+ img = self.transforms1(img)
288
+ else:
289
+ raise ValueError("Not implemented yet")
290
+
291
+ return (img, label)
292
+
293
+
294
+
295
+ def __len__(self):
296
+ return len(self.img_lines)
297
+
298
+
299
+
300
+ @staticmethod
301
+ def collate_fn(batch):
302
+ # Unzip the batch into images and labels
303
+ images, labels = zip(*batch)
304
+
305
+ # images, labels = zip(batch['image'], batch['label'])
306
+
307
+ # image_list = []
308
+
309
+ # for i in range(len(images)//5):
310
+
311
+ # img = images[i*5:(i+1)*5]
312
+
313
+ # # do augmentation
314
+ # imgs_aug = augmentation_methods2(image=np.asarray(img[0]), image1=np.asarray(img[1]), image2=np.asarray(img[2]), image3=np.asarray(img[3]), image4=np.asarray(img[4]))
315
+ # for k in imgs_aug:
316
+
317
+ # img_aug = Image.fromarray(np.array(imgs_aug[k], dtype=np.uint8)) # covnert numpy to PIL
318
+
319
+ # # transform with PIL as input
320
+ # img_aug = transforms1(img_aug)
321
+ # image_list.append(img_aug)
322
+
323
+ # Stack the images and labels
324
+ images = torch.stack(images, dim=0) # Shape: (batch_size, c, h, w)
325
+ labels = torch.tensor(labels, dtype=torch.long)
326
+
327
+ bs, c, h, w = images.shape
328
+
329
+ # Assume videos_per_group is 5 in our case
330
+ videos_per_group = 5
331
+ num_groups = bs // videos_per_group
332
+
333
+ # Reshape to get the group dimension: (num_groups, videos_per_group, c, h, w)
334
+ images_grouped = images.view(num_groups, videos_per_group, c, h, w)
335
+ labels_grouped = labels.view(num_groups, videos_per_group)
336
+
337
+ valid_indices = []
338
+ for i, group in enumerate(labels_grouped):
339
+ if set(group.numpy().tolist()) == {0, 1, 2, 3, 4}:
340
+ valid_indices.append(i)
341
+ # elif set(group.numpy().tolist()) == {0, 1, 2, 3}:
342
+ # valid_indices.append(i)
343
+ # elif set(group.numpy().tolist()) == {0, 1, 2, 3, 4, 5}:
344
+ # valid_indices.append(i)
345
+
346
+ images_grouped = images_grouped[valid_indices]
347
+ labels_grouped = labels_grouped[valid_indices]
348
+
349
+ if not valid_indices:
350
+ raise ValueError("No valid groups found in this batch.")
351
+
352
+ # # Shuffle the video order within each group
353
+ # for i in range(num_groups):
354
+ # perm = torch.randperm(videos_per_group)
355
+ # images_grouped[i] = images_grouped[i, perm]
356
+ # labels_grouped[i] = labels_grouped[i, perm]
357
+
358
+ # # Flatten back to original shape but with shuffled video order
359
+ # images_shuffled = images_grouped.view(num_groups, videos_per_group, c, h, w)
360
+ # labels_shuffled = labels_grouped.view(bs)
361
+
362
+ return {'image': images_grouped, 'label': labels_grouped, 'mask': None, 'landmark': None}
363
+
364
+
365
+ if __name__ == '__main__':
366
+ with open('/data/home/zhiyuanyan/DeepfakeBench/training/config/detector/lsda.yaml', 'r') as f:
367
+ config = yaml.safe_load(f)
368
+ train_set = LSDADataset(config=config, mode='train')
369
+ custom_sampler = CustomSampler(num_groups=2*360, n_frame_per_vid=config['frame_num']['train'], batch_size=config['train_batchSize'], videos_per_group=5)
370
+ train_data_loader = \
371
+ torch.utils.data.DataLoader(
372
+ dataset=train_set,
373
+ batch_size=config['train_batchSize'],
374
+ num_workers=0,
375
+ sampler=custom_sampler,
376
+ collate_fn=train_set.collate_fn,
377
+ )
378
+ from tqdm import tqdm
379
+ for iteration, batch in enumerate(tqdm(train_data_loader)):
380
+ print(iteration)
381
+ if iteration > 10:
382
+ break
training/dataset/pair_dataset.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ # author: Zhiyuan Yan
3
+ # email: [email protected]
4
+ # date: 2023-03-30
5
+
6
+ The code is designed for scenarios such as disentanglement-based methods where it is necessary to ensure an equal number of positive and negative samples.
7
+ '''
8
+
9
+ import torch
10
+ import random
11
+ import numpy as np
12
+ from dataset.abstract_dataset import DeepfakeAbstractBaseDataset
13
+
14
+
15
+ class pairDataset(DeepfakeAbstractBaseDataset):
16
+ def __init__(self, config=None, mode='train'):
17
+ super().__init__(config, mode)
18
+
19
+ # Get real and fake image lists
20
+ # Fix the label of real images to be 0 and fake images to be 1
21
+ self.fake_imglist = [(img, label, 1) for img, label in zip(self.image_list, self.label_list) if label != 0]
22
+ self.real_imglist = [(img, label, 0) for img, label in zip(self.image_list, self.label_list) if label == 0]
23
+
24
+ def __getitem__(self, index, norm=True):
25
+ # Get the fake and real image paths and labels
26
+ fake_image_path, fake_spe_label, fake_label = self.fake_imglist[index]
27
+ real_index = random.randint(0, len(self.real_imglist) - 1) # Randomly select a real image
28
+ real_image_path, real_spe_label, real_label = self.real_imglist[real_index]
29
+
30
+ # Get the mask and landmark paths for fake and real images
31
+ fake_mask_path = fake_image_path.replace('frames', 'masks')
32
+ fake_landmark_path = fake_image_path.replace('frames', 'landmarks').replace('.png', '.npy')
33
+
34
+ real_mask_path = real_image_path.replace('frames', 'masks')
35
+ real_landmark_path = real_image_path.replace('frames', 'landmarks').replace('.png', '.npy')
36
+
37
+ # Load the fake and real images
38
+ fake_image = self.load_rgb(fake_image_path)
39
+ real_image = self.load_rgb(real_image_path)
40
+
41
+ fake_image = np.array(fake_image) # Convert to numpy array for data augmentation
42
+ real_image = np.array(real_image) # Convert to numpy array for data augmentation
43
+
44
+ # Load mask and landmark (if needed) for fake and real images
45
+ if self.config['with_mask']:
46
+ fake_mask = self.load_mask(fake_mask_path)
47
+ real_mask = self.load_mask(real_mask_path)
48
+ else:
49
+ fake_mask, real_mask = None, None
50
+
51
+ if self.config['with_landmark']:
52
+ fake_landmarks = self.load_landmark(fake_landmark_path)
53
+ real_landmarks = self.load_landmark(real_landmark_path)
54
+ else:
55
+ fake_landmarks, real_landmarks = None, None
56
+
57
+ # Do transforms for fake and real images
58
+ fake_image_trans, fake_landmarks_trans, fake_mask_trans = self.data_aug(fake_image, fake_landmarks, fake_mask)
59
+ real_image_trans, real_landmarks_trans, real_mask_trans = self.data_aug(real_image, real_landmarks, real_mask)
60
+
61
+ if not norm:
62
+ return {"fake": (fake_image_trans, fake_label),
63
+ "real": (real_image_trans, real_label)}
64
+
65
+ # To tensor and normalize for fake and real images
66
+ fake_image_trans = self.normalize(self.to_tensor(fake_image_trans))
67
+ real_image_trans = self.normalize(self.to_tensor(real_image_trans))
68
+
69
+ # Convert landmarks and masks to tensors if they exist
70
+ if self.config['with_landmark']:
71
+ fake_landmarks_trans = torch.from_numpy(fake_landmarks_trans)
72
+ real_landmarks_trans = torch.from_numpy(real_landmarks_trans)
73
+ if self.config['with_mask']:
74
+ fake_mask_trans = torch.from_numpy(fake_mask_trans)
75
+ real_mask_trans = torch.from_numpy(real_mask_trans)
76
+
77
+ return {"fake": (fake_image_trans, fake_label, fake_spe_label, fake_landmarks_trans, fake_mask_trans),
78
+ "real": (real_image_trans, real_label, real_spe_label, real_landmarks_trans, real_mask_trans)}
79
+
80
+ def __len__(self):
81
+ return len(self.fake_imglist)
82
+
83
+ @staticmethod
84
+ def collate_fn(batch):
85
+ """
86
+ Collate a batch of data points.
87
+
88
+ Args:
89
+ batch (list): A list of tuples containing the image tensor, the label tensor,
90
+ the landmark tensor, and the mask tensor.
91
+
92
+ Returns:
93
+ A tuple containing the image tensor, the label tensor, the landmark tensor,
94
+ and the mask tensor.
95
+ """
96
+ # Separate the image, label, landmark, and mask tensors for fake and real data
97
+ fake_images, fake_labels, fake_spe_labels, fake_landmarks, fake_masks = zip(*[data["fake"] for data in batch])
98
+ real_images, real_labels, real_spe_labels, real_landmarks, real_masks = zip(*[data["real"] for data in batch])
99
+
100
+ # Stack the image, label, landmark, and mask tensors for fake and real data
101
+ fake_images = torch.stack(fake_images, dim=0)
102
+ fake_labels = torch.LongTensor(fake_labels)
103
+ fake_spe_labels = torch.LongTensor(fake_spe_labels)
104
+ real_images = torch.stack(real_images, dim=0)
105
+ real_labels = torch.LongTensor(real_labels)
106
+ real_spe_labels = torch.LongTensor(real_spe_labels)
107
+
108
+ # Special case for landmarks and masks if they are None
109
+ if fake_landmarks[0] is not None:
110
+ fake_landmarks = torch.stack(fake_landmarks, dim=0)
111
+ else:
112
+ fake_landmarks = None
113
+ if real_landmarks[0] is not None:
114
+ real_landmarks = torch.stack(real_landmarks, dim=0)
115
+ else:
116
+ real_landmarks = None
117
+
118
+ if fake_masks[0] is not None:
119
+ fake_masks = torch.stack(fake_masks, dim=0)
120
+ else:
121
+ fake_masks = None
122
+ if real_masks[0] is not None:
123
+ real_masks = torch.stack(real_masks, dim=0)
124
+ else:
125
+ real_masks = None
126
+
127
+ # Combine the fake and real tensors and create a dictionary of the tensors
128
+ images = torch.cat([real_images, fake_images], dim=0)
129
+ labels = torch.cat([real_labels, fake_labels], dim=0)
130
+ spe_labels = torch.cat([real_spe_labels, fake_spe_labels], dim=0)
131
+
132
+ if fake_landmarks is not None and real_landmarks is not None:
133
+ landmarks = torch.cat([real_landmarks, fake_landmarks], dim=0)
134
+ else:
135
+ landmarks = None
136
+
137
+ if fake_masks is not None and real_masks is not None:
138
+ masks = torch.cat([real_masks, fake_masks], dim=0)
139
+ else:
140
+ masks = None
141
+
142
+ data_dict = {
143
+ 'image': images,
144
+ 'label': labels,
145
+ 'label_spe': spe_labels,
146
+ 'landmark': landmarks,
147
+ 'mask': masks
148
+ }
149
+ return data_dict
150
+
training/dataset/sbi_api.py ADDED
@@ -0,0 +1,371 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Created by: Kaede Shiohara
2
+ # Yamasaki Lab at The University of Tokyo
3
4
+ # Copyright (c) 2021
5
+ # 3rd party softwares' licenses are noticed at https://github.com/mapooon/SelfBlendedImages/blob/master/LICENSE
6
+
7
+ import torch
8
+ from torchvision import datasets,transforms,utils
9
+ from torch.utils.data import Dataset,IterableDataset
10
+ from glob import glob
11
+ import os
12
+ import numpy as np
13
+ from PIL import Image
14
+ import random
15
+ import cv2
16
+ from torch import nn
17
+ import sys
18
+ import scipy as sp
19
+ from skimage.measure import label, regionprops
20
+ from training.dataset.library.bi_online_generation import random_get_hull
21
+ import albumentations as alb
22
+
23
+ import warnings
24
+ warnings.filterwarnings('ignore')
25
+
26
+
27
+ def alpha_blend(source,target,mask):
28
+ mask_blured = get_blend_mask(mask)
29
+ img_blended=(mask_blured * source + (1 - mask_blured) * target)
30
+ return img_blended,mask_blured
31
+
32
+
33
+ def dynamic_blend(source,target,mask):
34
+ mask_blured = get_blend_mask(mask)
35
+ blend_list=[0.25,0.5,0.75,1,1,1]
36
+ blend_ratio = blend_list[np.random.randint(len(blend_list))]
37
+ mask_blured*=blend_ratio
38
+ img_blended=(mask_blured * source + (1 - mask_blured) * target)
39
+ return img_blended,mask_blured
40
+
41
+
42
+ def get_blend_mask(mask):
43
+ H,W=mask.shape
44
+ size_h=np.random.randint(192,257)
45
+ size_w=np.random.randint(192,257)
46
+ mask=cv2.resize(mask,(size_w,size_h))
47
+ kernel_1=random.randrange(5,26,2)
48
+ kernel_1=(kernel_1,kernel_1)
49
+ kernel_2=random.randrange(5,26,2)
50
+ kernel_2=(kernel_2,kernel_2)
51
+
52
+ mask_blured = cv2.GaussianBlur(mask, kernel_1, 0)
53
+ mask_blured = mask_blured/(mask_blured.max())
54
+ mask_blured[mask_blured<1]=0
55
+
56
+ mask_blured = cv2.GaussianBlur(mask_blured, kernel_2, np.random.randint(5,46))
57
+ mask_blured = mask_blured/(mask_blured.max())
58
+ mask_blured = cv2.resize(mask_blured,(W,H))
59
+ return mask_blured.reshape((mask_blured.shape+(1,)))
60
+
61
+
62
+ def get_alpha_blend_mask(mask):
63
+ kernel_list=[(11,11),(9,9),(7,7),(5,5),(3,3)]
64
+ blend_list=[0.25,0.5,0.75]
65
+ kernel_idxs=random.choices(range(len(kernel_list)), k=2)
66
+ blend_ratio = blend_list[random.sample(range(len(blend_list)), 1)[0]]
67
+ mask_blured = cv2.GaussianBlur(mask, kernel_list[0], 0)
68
+ # print(mask_blured.max())
69
+ mask_blured[mask_blured<mask_blured.max()]=0
70
+ mask_blured[mask_blured>0]=1
71
+ # mask_blured = mask
72
+ mask_blured = cv2.GaussianBlur(mask_blured, kernel_list[kernel_idxs[1]], 0)
73
+ mask_blured = mask_blured/(mask_blured.max())
74
+ return mask_blured.reshape((mask_blured.shape+(1,)))
75
+
76
+
77
+ class RandomDownScale(alb.core.transforms_interface.ImageOnlyTransform):
78
+ def apply(self,img,**params):
79
+ return self.randomdownscale(img)
80
+
81
+ def randomdownscale(self,img):
82
+ keep_ratio=True
83
+ keep_input_shape=True
84
+ H,W,C=img.shape
85
+ ratio_list=[2,4]
86
+ r=ratio_list[np.random.randint(len(ratio_list))]
87
+ img_ds=cv2.resize(img,(int(W/r),int(H/r)),interpolation=cv2.INTER_NEAREST)
88
+ if keep_input_shape:
89
+ img_ds=cv2.resize(img_ds,(W,H),interpolation=cv2.INTER_LINEAR)
90
+
91
+ return img_ds
92
+
93
+
94
+
95
+ def get_boundary(mask, apply_dilation=True, apply_motion_blur=True):
96
+ if len(mask.shape) == 3:
97
+ mask = mask[:, :, 0]
98
+
99
+ mask = cv2.GaussianBlur(mask, (3, 3), 0)
100
+ if mask.max() > 1:
101
+ boundary = mask / 255.
102
+ else:
103
+ boundary = mask
104
+ boundary = 4 * boundary * (1. - boundary)
105
+
106
+ boundary = boundary * 255
107
+ boundary = random_dilate(boundary)
108
+
109
+ if apply_motion_blur:
110
+ boundary = random_motion_blur(boundary)
111
+ boundary = boundary / 255.
112
+ return boundary
113
+
114
+ def random_dilate(mask, max_kernel_size=5):
115
+ kernel_size = random.randint(1, max_kernel_size)
116
+ kernel = np.ones((kernel_size, kernel_size), np.uint8)
117
+ dilated_mask = cv2.dilate(mask, kernel, iterations=1)
118
+ return dilated_mask
119
+
120
+ def random_motion_blur(mask, max_kernel_size=5):
121
+ kernel_size = random.randint(1, max_kernel_size)
122
+ kernel = np.zeros((kernel_size, kernel_size))
123
+ anchor = random.randint(0, kernel_size - 1)
124
+ kernel[:, anchor] = 1 / kernel_size
125
+ motion_blurred_mask = cv2.filter2D(mask, -1, kernel)
126
+ return motion_blurred_mask
127
+
128
+
129
+
130
+ class SBI_API:
131
+ def __init__(self,phase='train',image_size=256):
132
+
133
+ assert phase == 'train', f"Current SBI API only support train phase, but got {phase}"
134
+
135
+ self.image_size=(image_size,image_size)
136
+ self.phase=phase
137
+
138
+ self.transforms=self.get_transforms()
139
+ self.source_transforms = self.get_source_transforms()
140
+ self.bob_transforms = self.get_source_transforms_for_bob()
141
+
142
+
143
+ def __call__(self,img,landmark=None):
144
+ try:
145
+ assert landmark is not None, "landmark of the facial image should not be None."
146
+ # img_r,img_f,mask_f=self.self_blending(img.copy(),landmark.copy())
147
+
148
+ if random.random() < 1.0:
149
+ # apply sbi
150
+ img_r,img_f,mask_f=self.self_blending(img.copy(),landmark.copy())
151
+ else:
152
+ # apply boundary motion blur (bob)
153
+ img_r,img_f,mask_f=self.bob(img.copy(),landmark.copy())
154
+
155
+ if self.phase=='train':
156
+ transformed=self.transforms(image=img_f.astype('uint8'),image1=img_r.astype('uint8'))
157
+ img_f=transformed['image']
158
+ img_r=transformed['image1']
159
+ return img_f,img_r
160
+ except Exception as e:
161
+ print(e)
162
+ return None,None
163
+
164
+
165
+ def get_source_transforms(self):
166
+ return alb.Compose([
167
+ alb.Compose([
168
+ alb.RGBShift((-20,20),(-20,20),(-20,20),p=0.3),
169
+ alb.HueSaturationValue(hue_shift_limit=(-0.3,0.3), sat_shift_limit=(-0.3,0.3), val_shift_limit=(-0.3,0.3), p=1),
170
+ alb.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1,0.1), p=1),
171
+ ],p=1),
172
+
173
+ alb.OneOf([
174
+ RandomDownScale(p=1),
175
+ alb.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=1),
176
+ ],p=1),
177
+
178
+ ], p=1.)
179
+
180
+
181
+ def get_transforms(self):
182
+ return alb.Compose([
183
+
184
+ alb.RGBShift((-20,20),(-20,20),(-20,20),p=0.3),
185
+ alb.HueSaturationValue(hue_shift_limit=(-0.3,0.3), sat_shift_limit=(-0.3,0.3), val_shift_limit=(-0.3,0.3), p=0.3),
186
+ alb.RandomBrightnessContrast(brightness_limit=(-0.3,0.3), contrast_limit=(-0.3,0.3), p=0.3),
187
+ alb.ImageCompression(quality_lower=40,quality_upper=100,p=0.5),
188
+
189
+ ],
190
+ additional_targets={f'image1': 'image'},
191
+ p=1.)
192
+
193
+
194
+ def randaffine(self,img,mask):
195
+ f=alb.Affine(
196
+ translate_percent={'x':(-0.03,0.03),'y':(-0.015,0.015)},
197
+ scale=[0.95,1/0.95],
198
+ fit_output=False,
199
+ p=1)
200
+
201
+ g=alb.ElasticTransform(
202
+ alpha=50,
203
+ sigma=7,
204
+ alpha_affine=0,
205
+ p=1,
206
+ )
207
+
208
+ transformed=f(image=img,mask=mask)
209
+ img=transformed['image']
210
+
211
+ mask=transformed['mask']
212
+ transformed=g(image=img,mask=mask)
213
+ mask=transformed['mask']
214
+ return img,mask
215
+
216
+
217
+ def get_source_transforms_for_bob(self):
218
+ return alb.Compose([
219
+ alb.Compose([
220
+ alb.ImageCompression(quality_lower=40,quality_upper=100,p=1),
221
+ ],p=1),
222
+
223
+ alb.OneOf([
224
+ RandomDownScale(p=1),
225
+ alb.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=1),
226
+ ],p=1),
227
+
228
+ ], p=1.)
229
+
230
+ def bob(self,img,landmark):
231
+ H,W=len(img),len(img[0])
232
+ if np.random.rand()<0.25:
233
+ landmark=landmark[:68]
234
+ # mask=np.zeros_like(img[:,:,0])
235
+ # cv2.fillConvexPoly(mask, cv2.convexHull(landmark), 1.)
236
+ hull_type = random.choice([0, 1, 2, 3])
237
+ mask=random_get_hull(landmark,img,hull_type)[:,:,0]
238
+
239
+ source = img.copy()
240
+ source = self.bob_transforms(image=source.astype(np.uint8))['image']
241
+ source, mask = self.randaffine(source,mask)
242
+ mask = get_blend_mask(mask)
243
+
244
+ # get boundary with motion blur
245
+ boundary = get_boundary(mask)
246
+
247
+ blend_list = [0.25,0.5,0.75,1,1,1]
248
+ blend_ratio = blend_list[np.random.randint(len(blend_list))]
249
+ boundary *= blend_ratio
250
+ boundary = np.repeat(boundary[:, :, np.newaxis], 3, axis=2)
251
+ img_blended = (boundary * source + (1 - boundary) * img)
252
+
253
+ img_blended = img_blended.astype(np.uint8)
254
+ img = img.astype(np.uint8)
255
+
256
+ return img,img_blended,boundary.squeeze()
257
+
258
+
259
+ def self_blending(self,img,landmark):
260
+ H,W=len(img),len(img[0])
261
+ if np.random.rand()<0.25:
262
+ landmark=landmark[:68]
263
+ # mask=np.zeros_like(img[:,:,0])
264
+ # cv2.fillConvexPoly(mask, cv2.convexHull(landmark), 1.)
265
+ hull_type = random.choice([0, 1, 2, 3])
266
+ mask=random_get_hull(landmark,img,hull_type)[:,:,0]
267
+
268
+ source = img.copy()
269
+ if np.random.rand()<0.5:
270
+ source = self.source_transforms(image=source.astype(np.uint8))['image']
271
+ else:
272
+ img = self.source_transforms(image=img.astype(np.uint8))['image']
273
+
274
+ source, mask = self.randaffine(source,mask)
275
+
276
+ img_blended,mask=dynamic_blend(source,img,mask)
277
+ img_blended = img_blended.astype(np.uint8)
278
+ img = img.astype(np.uint8)
279
+
280
+ return img,img_blended,mask
281
+
282
+
283
+ def reorder_landmark(self,landmark):
284
+ landmark_add=np.zeros((13,2))
285
+ for idx,idx_l in enumerate([77,75,76,68,69,70,71,80,72,73,79,74,78]):
286
+ landmark_add[idx]=landmark[idx_l]
287
+ landmark[68:]=landmark_add
288
+ return landmark
289
+
290
+
291
+ def hflip(self,img,mask=None,landmark=None,bbox=None):
292
+ H,W=img.shape[:2]
293
+ landmark=landmark.copy()
294
+ if bbox is not None:
295
+ bbox=bbox.copy()
296
+
297
+ if landmark is not None:
298
+ landmark_new=np.zeros_like(landmark)
299
+
300
+
301
+ landmark_new[:17]=landmark[:17][::-1]
302
+ landmark_new[17:27]=landmark[17:27][::-1]
303
+
304
+ landmark_new[27:31]=landmark[27:31]
305
+ landmark_new[31:36]=landmark[31:36][::-1]
306
+
307
+ landmark_new[36:40]=landmark[42:46][::-1]
308
+ landmark_new[40:42]=landmark[46:48][::-1]
309
+
310
+ landmark_new[42:46]=landmark[36:40][::-1]
311
+ landmark_new[46:48]=landmark[40:42][::-1]
312
+
313
+ landmark_new[48:55]=landmark[48:55][::-1]
314
+ landmark_new[55:60]=landmark[55:60][::-1]
315
+
316
+ landmark_new[60:65]=landmark[60:65][::-1]
317
+ landmark_new[65:68]=landmark[65:68][::-1]
318
+ if len(landmark)==68:
319
+ pass
320
+ elif len(landmark)==81:
321
+ landmark_new[68:81]=landmark[68:81][::-1]
322
+ else:
323
+ raise NotImplementedError
324
+ landmark_new[:,0]=W-landmark_new[:,0]
325
+
326
+ else:
327
+ landmark_new=None
328
+
329
+ if bbox is not None:
330
+ bbox_new=np.zeros_like(bbox)
331
+ bbox_new[0,0]=bbox[1,0]
332
+ bbox_new[1,0]=bbox[0,0]
333
+ bbox_new[:,0]=W-bbox_new[:,0]
334
+ bbox_new[:,1]=bbox[:,1].copy()
335
+ if len(bbox)>2:
336
+ bbox_new[2,0]=W-bbox[3,0]
337
+ bbox_new[2,1]=bbox[3,1]
338
+ bbox_new[3,0]=W-bbox[2,0]
339
+ bbox_new[3,1]=bbox[2,1]
340
+ bbox_new[4,0]=W-bbox[4,0]
341
+ bbox_new[4,1]=bbox[4,1]
342
+ bbox_new[5,0]=W-bbox[6,0]
343
+ bbox_new[5,1]=bbox[6,1]
344
+ bbox_new[6,0]=W-bbox[5,0]
345
+ bbox_new[6,1]=bbox[5,1]
346
+ else:
347
+ bbox_new=None
348
+
349
+ if mask is not None:
350
+ mask=mask[:,::-1]
351
+ else:
352
+ mask=None
353
+ img=img[:,::-1].copy()
354
+ return img,mask,landmark_new,bbox_new
355
+
356
+
357
+ if __name__=='__main__':
358
+ seed=10
359
+ random.seed(seed)
360
+ torch.manual_seed(seed)
361
+ np.random.seed(seed)
362
+ torch.cuda.manual_seed(seed)
363
+ torch.backends.cudnn.deterministic = True
364
+ torch.backends.cudnn.benchmark = False
365
+ api=SBI_API(phase='train',image_size=256)
366
+
367
+ img_path = 'FaceForensics++/original_sequences/youtube/c23/frames/000/000.png'
368
+ img = cv2.imread(img_path)
369
+ landmark_path = img_path.replace('frames', 'landmarks').replace('png', 'npy')
370
+ landmark = np.load(landmark_path)
371
+ sbi_img, ori_img = api(img, landmark)
training/dataset/sbi_dataset.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ # author: Zhiyuan Yan
3
+ # email: [email protected]
4
+ # date: 2024-01-26
5
+
6
+ The code is designed for self-blending method (SBI, CVPR 2024).
7
+ '''
8
+
9
+ import sys
10
+ sys.path.append('.')
11
+
12
+ import cv2
13
+ import yaml
14
+ import torch
15
+ import numpy as np
16
+ from copy import deepcopy
17
+ import albumentations as A
18
+ from training.dataset.albu import IsotropicResize
19
+ from training.dataset.abstract_dataset import DeepfakeAbstractBaseDataset
20
+ from training.dataset.sbi_api import SBI_API
21
+
22
+
23
+ class SBIDataset(DeepfakeAbstractBaseDataset):
24
+ def __init__(self, config=None, mode='train'):
25
+ super().__init__(config, mode)
26
+
27
+ # Get real lists
28
+ # Fix the label of real images to be 0
29
+ self.real_imglist = [(img, label) for img, label in zip(self.image_list, self.label_list) if label == 0]
30
+
31
+ # Init SBI
32
+ self.sbi = SBI_API(phase=mode,image_size=config['resolution'])
33
+
34
+ # Init data augmentation method
35
+ self.transform = self.init_data_aug_method()
36
+
37
+ def __getitem__(self, index):
38
+ # Get the real image paths and labels
39
+ real_image_path, real_label = self.real_imglist[index]
40
+
41
+ # Get the landmark paths for real images
42
+ real_landmark_path = real_image_path.replace('frames', 'landmarks').replace('.png', '.npy')
43
+ landmark = self.load_landmark(real_landmark_path).astype(np.int32)
44
+
45
+ # Load the real images
46
+ real_image = self.load_rgb(real_image_path)
47
+ real_image = np.array(real_image) # Convert to numpy array
48
+
49
+ # Generate the corresponding SBI sample
50
+ fake_image, real_image = self.sbi(real_image, landmark)
51
+ if fake_image is None:
52
+ fake_image = deepcopy(real_image)
53
+ fake_label = 0
54
+ else:
55
+ fake_label = 1
56
+
57
+ # To tensor and normalize for fake and real images
58
+ fake_image_trans = self.normalize(self.to_tensor(fake_image))
59
+ real_image_trans = self.normalize(self.to_tensor(real_image))
60
+
61
+ return {"fake": (fake_image_trans, fake_label),
62
+ "real": (real_image_trans, real_label)}
63
+
64
+ def __len__(self):
65
+ return len(self.real_imglist)
66
+
67
+ @staticmethod
68
+ def collate_fn(batch):
69
+ """
70
+ Collate a batch of data points.
71
+
72
+ Args:
73
+ batch (list): A list of tuples containing the image tensor and label tensor.
74
+
75
+ Returns:
76
+ A tuple containing the image tensor, the label tensor, the landmark tensor,
77
+ and the mask tensor.
78
+ """
79
+ # Separate the image, label, landmark, and mask tensors for fake and real data
80
+ fake_images, fake_labels = zip(*[data["fake"] for data in batch])
81
+ real_images, real_labels = zip(*[data["real"] for data in batch])
82
+
83
+ # Stack the image, label, landmark, and mask tensors for fake and real data
84
+ fake_images = torch.stack(fake_images, dim=0)
85
+ fake_labels = torch.LongTensor(fake_labels)
86
+ real_images = torch.stack(real_images, dim=0)
87
+ real_labels = torch.LongTensor(real_labels)
88
+
89
+ # Combine the fake and real tensors and create a dictionary of the tensors
90
+ images = torch.cat([real_images, fake_images], dim=0)
91
+ labels = torch.cat([real_labels, fake_labels], dim=0)
92
+
93
+ data_dict = {
94
+ 'image': images,
95
+ 'label': labels,
96
+ 'landmark': None,
97
+ 'mask': None,
98
+ }
99
+ return data_dict
100
+
101
+ def init_data_aug_method(self):
102
+ trans = A.Compose([
103
+ A.HorizontalFlip(p=self.config['data_aug']['flip_prob']),
104
+ A.Rotate(limit=self.config['data_aug']['rotate_limit'], p=self.config['data_aug']['rotate_prob']),
105
+ A.GaussianBlur(blur_limit=self.config['data_aug']['blur_limit'], p=self.config['data_aug']['blur_prob']),
106
+ A.OneOf([
107
+ IsotropicResize(max_side=self.config['resolution'], interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC),
108
+ IsotropicResize(max_side=self.config['resolution'], interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_LINEAR),
109
+ IsotropicResize(max_side=self.config['resolution'], interpolation_down=cv2.INTER_LINEAR, interpolation_up=cv2.INTER_LINEAR),
110
+ ], p = 0 if self.config['with_landmark'] else 1),
111
+ A.OneOf([
112
+ A.RandomBrightnessContrast(brightness_limit=self.config['data_aug']['brightness_limit'], contrast_limit=self.config['data_aug']['contrast_limit']),
113
+ A.FancyPCA(),
114
+ A.HueSaturationValue()
115
+ ], p=0.5),
116
+ A.ImageCompression(quality_lower=self.config['data_aug']['quality_lower'], quality_upper=self.config['data_aug']['quality_upper'], p=0.5)
117
+ ],
118
+ additional_targets={'real': 'sbi'},
119
+ )
120
+ return trans
121
+
122
+
123
+ if __name__ == '__main__':
124
+ with open('/data/home/zhiyuanyan/DeepfakeBench/training/config/detector/sbi.yaml', 'r') as f:
125
+ config = yaml.safe_load(f)
126
+ train_set = SBIDataset(config=config, mode='train')
127
+ train_data_loader = \
128
+ torch.utils.data.DataLoader(
129
+ dataset=train_set,
130
+ batch_size=config['train_batchSize'],
131
+ shuffle=True,
132
+ num_workers=0,
133
+ collate_fn=train_set.collate_fn,
134
+ )
135
+ from tqdm import tqdm
136
+ for iteration, batch in enumerate(tqdm(train_data_loader)):
137
+ print(iteration)
138
+ if iteration > 10:
139
+ break
training/dataset/tall_dataset.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # author: Zhiyuan Yan
2
+ # email: [email protected]
3
+ # date: 2023-03-30
4
+ # description: Abstract Base Class for all types of deepfake datasets.
5
+
6
+ import sys
7
+
8
+ from torch import nn
9
+
10
+ sys.path.append('.')
11
+
12
+ import yaml
13
+ import numpy as np
14
+ from copy import deepcopy
15
+ import random
16
+ import torch
17
+ from torch.utils import data
18
+ from torchvision.utils import save_image
19
+ from training.dataset import DeepfakeAbstractBaseDataset
20
+ from einops import rearrange
21
+
22
+ FFpp_pool = ['FaceForensics++', 'FaceShifter', 'DeepFakeDetection', 'FF-DF', 'FF-F2F', 'FF-FS', 'FF-NT'] #
23
+
24
+
25
+ def all_in_pool(inputs, pool):
26
+ for each in inputs:
27
+ if each not in pool:
28
+ return False
29
+ return True
30
+
31
+
32
+ class TALLDataset(DeepfakeAbstractBaseDataset):
33
+ def __init__(self, config=None, mode='train'):
34
+ """Initializes the dataset object.
35
+
36
+ Args:
37
+ config (dict): A dictionary containing configuration parameters.
38
+ mode (str): A string indicating the mode (train or test).
39
+
40
+ Raises:
41
+ NotImplementedError: If mode is not train or test.
42
+ """
43
+ super().__init__(config, mode)
44
+
45
+ assert self.video_level, "TALL is a videl-based method"
46
+ assert int(self.clip_size ** 0.5) ** 2 == self.clip_size, 'clip_size must be square of an integer, e.g., 4'
47
+
48
+ def __getitem__(self, index, no_norm=False):
49
+ """
50
+ Returns the data point at the given index.
51
+
52
+ Args:
53
+ index (int): The index of the data point.
54
+
55
+ Returns:
56
+ A tuple containing the image tensor, the label tensor, the landmark tensor,
57
+ and the mask tensor.
58
+ """
59
+ # Get the image paths and label
60
+ image_paths = self.data_dict['image'][index]
61
+ label = self.data_dict['label'][index]
62
+
63
+ if not isinstance(image_paths, list):
64
+ image_paths = [image_paths] # for the image-level IO, only one frame is used
65
+
66
+ image_tensors = []
67
+ landmark_tensors = []
68
+ mask_tensors = []
69
+ augmentation_seed = None
70
+
71
+ for image_path in image_paths:
72
+ # Initialize a new seed for data augmentation at the start of each video
73
+ if self.video_level and image_path == image_paths[0]:
74
+ augmentation_seed = random.randint(0, 2 ** 32 - 1)
75
+
76
+ # Get the mask and landmark paths
77
+ mask_path = image_path.replace('frames', 'masks') # Use .png for mask
78
+ landmark_path = image_path.replace('frames', 'landmarks').replace('.png', '.npy') # Use .npy for landmark
79
+
80
+ # Load the image
81
+ try:
82
+ image = self.load_rgb(image_path)
83
+ except Exception as e:
84
+ # Skip this image and return the first one
85
+ print(f"Error loading image at index {index}: {e}")
86
+ return self.__getitem__(0)
87
+ image = np.array(image) # Convert to numpy array for data augmentation
88
+
89
+ # Load mask and landmark (if needed)
90
+ if self.config['with_mask']:
91
+ mask = self.load_mask(mask_path)
92
+ else:
93
+ mask = None
94
+ if self.config['with_landmark']:
95
+ landmarks = self.load_landmark(landmark_path)
96
+ else:
97
+ landmarks = None
98
+
99
+ # Do Data Augmentation
100
+ if self.mode == 'train' and self.config['use_data_augmentation']:
101
+ image_trans, landmarks_trans, mask_trans = self.data_aug(image, landmarks, mask, augmentation_seed)
102
+ else:
103
+ image_trans, landmarks_trans, mask_trans = deepcopy(image), deepcopy(landmarks), deepcopy(mask)
104
+
105
+ # To tensor and normalize
106
+ if not no_norm:
107
+ image_trans = self.normalize(self.to_tensor(image_trans))
108
+ if self.config['with_landmark']:
109
+ landmarks_trans = torch.from_numpy(landmarks)
110
+ if self.config['with_mask']:
111
+ mask_trans = torch.from_numpy(mask_trans)
112
+
113
+ image_tensors.append(image_trans)
114
+ landmark_tensors.append(landmarks_trans)
115
+ mask_tensors.append(mask_trans)
116
+
117
+ if self.video_level:
118
+
119
+ # Stack image tensors along a new dimension (time)
120
+ image_tensors = torch.stack(image_tensors, dim=0)
121
+
122
+ # cut out 16x16 patch
123
+ F, C, H, W = image_tensors.shape
124
+ x, y = np.random.randint(W), np.random.randint(H)
125
+ x1 = np.clip(x - self.config['mask_grid_size'] // 2, 0, W)
126
+ x2 = np.clip(x + self.config['mask_grid_size'] // 2, 0, W)
127
+ y1 = np.clip(y - self.config['mask_grid_size'] // 2, 0, H)
128
+ y2 = np.clip(y + self.config['mask_grid_size'] // 2, 0, H)
129
+ image_tensors[:, :, y1:y2, x1:x2] = -1
130
+
131
+ # # concatenate sub-image and reszie to 224x224
132
+ # image_tensors = image_tensors.reshape(-1, H, W)
133
+ # image_tensors = rearrange(image_tensors, '(rh rw c) h w -> c (rh h) (rw w)', rh=2, c=C)
134
+ # image_tensors = nn.functional.interpolate(image_tensors.unsqueeze(0),
135
+ # size=(self.config['resolution'], self.config['resolution']),
136
+ # mode='bilinear', align_corners=False).squeeze(0)
137
+ # Stack landmark and mask tensors along a new dimension (time)
138
+ if not any(landmark is None or (isinstance(landmark, list) and None in landmark) for landmark in
139
+ landmark_tensors):
140
+ landmark_tensors = torch.stack(landmark_tensors, dim=0)
141
+ if not any(m is None or (isinstance(m, list) and None in m) for m in mask_tensors):
142
+ mask_tensors = torch.stack(mask_tensors, dim=0)
143
+ else:
144
+ # Get the first image tensor
145
+ image_tensors = image_tensors[0]
146
+ # Get the first landmark and mask tensors
147
+ if not any(landmark is None or (isinstance(landmark, list) and None in landmark) for landmark in
148
+ landmark_tensors):
149
+ landmark_tensors = landmark_tensors[0]
150
+ if not any(m is None or (isinstance(m, list) and None in m) for m in mask_tensors):
151
+ mask_tensors = mask_tensors[0]
152
+
153
+ return image_tensors, label, landmark_tensors, mask_tensors
154
+
155
+
156
+ if __name__ == "__main__":
157
+ with open('training/config/detector/tall.yaml', 'r') as f:
158
+ config = yaml.safe_load(f)
159
+ train_set = TALLDataset(
160
+ config=config,
161
+ mode='train',
162
+ )
163
+ train_data_loader = \
164
+ torch.utils.data.DataLoader(
165
+ dataset=train_set,
166
+ batch_size=config['train_batchSize'],
167
+ shuffle=True,
168
+ num_workers=0,
169
+ collate_fn=train_set.collate_fn,
170
+ )
171
+ from tqdm import tqdm
172
+
173
+ for iteration, batch in enumerate(tqdm(train_data_loader)):
174
+ print(batch['image'].shape)
175
+ print(batch['label'])
176
+ b, f, c, h, w = batch['image'].shape
177
+ for i in range(f):
178
+ img_tensor = batch['image'][0][i]
179
+ img_tensor = img_tensor * torch.tensor([0.5, 0.5, 0.5]).reshape(-1, 1, 1) + torch.tensor(
180
+ [0.5, 0.5, 0.5]).reshape(-1, 1, 1)
181
+ save_image(img_tensor, f'{i}.png')
182
+
183
+ break
training/dataset/utils/DeepFakeMask.py ADDED
@@ -0,0 +1,402 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- coding: UTF-8 -*-
3
+ # Created by: algohunt
4
+ # Microsoft Research & Peking University
5
6
+ # Copyright (c) 2019
7
+
8
+ #!/usr/bin/env python3
9
+ """ Masks functions for faceswap.py """
10
+
11
+ import inspect
12
+ import logging
13
+ import sys
14
+
15
+ import cv2
16
+ import numpy as np
17
+ import random
18
+ from math import ceil, floor
19
+ logger = logging.getLogger(__name__) # pylint: disable=invalid-name
20
+
21
+ def landmarks_to_bbox(landmarks: np.ndarray) -> np.ndarray:
22
+ if not isinstance(landmarks, np.ndarray):
23
+ landmarks = np.array(landmarks)
24
+ assert landmarks.shape[1] == 2
25
+ x0, y0 = np.min(landmarks, axis=0) # x和y轴上分别的最小值, [264,97]
26
+ x1, y1 = np.max(landmarks, axis=0) # x和y轴上分别的最小值, [370,236]
27
+ bbox = np.array([x0, y0, x1, y1])
28
+ return bbox
29
+
30
+ def mask_from_points(image: np.ndarray, points: np.ndarray) -> np.ndarray:
31
+ """8 (or omitted) - 8-connected line.
32
+ 4 - 4-connected line.
33
+ LINE_AA - antialiased line."""
34
+ h, w = image.shape[:2]
35
+ points = points.astype(int)
36
+ assert points.shape[1] == 2, f"points.shape: {points.shape}"
37
+ out = np.zeros((h, w), dtype=np.uint8)
38
+ hull = cv2.convexHull(points.astype(int))
39
+ cv2.fillConvexPoly(out, hull, 255, lineType=4) # cv2.LINE_AA
40
+ return out
41
+
42
+ def get_available_masks():
43
+ """ Return a list of the available masks for cli """
44
+ masks = sorted([name for name, obj in inspect.getmembers(sys.modules[__name__])
45
+ if inspect.isclass(obj) and name != "Mask"])
46
+ masks.append("none")
47
+ # logger.debug(masks)
48
+ return masks
49
+
50
+ def landmarks_68_symmetries():
51
+ # 68 landmarks symmetry
52
+ #
53
+ sym_ids = [9, 58, 67, 63, 52, 34, 31, 30, 29, 28]
54
+ sym = {
55
+ 1: 17,
56
+ 2: 16,
57
+ 3: 15,
58
+ 4: 14,
59
+ 5: 13,
60
+ 6: 12,
61
+ 7: 11,
62
+ 8: 10,
63
+ #
64
+ 51: 53,
65
+ 50: 54,
66
+ 49: 55,
67
+ 60: 56,
68
+ 59: 57,
69
+ #
70
+ 62: 64,
71
+ 61: 65,
72
+ 68: 66,
73
+ #
74
+ 33: 35,
75
+ 32: 36,
76
+ #
77
+ 37: 46,
78
+ 38: 45,
79
+ 39: 44,
80
+ 40: 43,
81
+ 41: 48,
82
+ 42: 47,
83
+ #
84
+ 18: 27,
85
+ 19: 26,
86
+ 20: 25,
87
+ 21: 24,
88
+ 22: 23,
89
+ #
90
+ # id
91
+ 9: 9,
92
+ 58: 58,
93
+ 67: 67,
94
+ 63: 63,
95
+ 52: 52,
96
+ 34: 34,
97
+ 31: 31,
98
+ 30: 30,
99
+ 29: 29,
100
+ 28: 28,
101
+ }
102
+ return sym, sym_ids
103
+
104
+
105
+
106
+ def get_default_mask():
107
+ """ Set the default mask for cli """
108
+ masks = get_available_masks()
109
+ default = "dfl_full"
110
+ default = default if default in masks else masks[0]
111
+ # logger.debug(default)
112
+ return default
113
+
114
+
115
+ class Mask():
116
+ """ Parent class for masks
117
+ the output mask will be <mask_type>.mask
118
+ channels: 1, 3 or 4:
119
+ 1 - Returns a single channel mask
120
+ 3 - Returns a 3 channel mask
121
+ 4 - Returns the original image with the mask in the alpha channel """
122
+
123
+ def __init__(self, landmarks, face, channels=4, idx = 0):
124
+ # logger.info("Initializing %s: (face_shape: %s, channels: %s, landmarks: %s)",
125
+ # self.__class__.__name__, face.shape, channels, landmarks)
126
+ self.landmarks = landmarks
127
+ self.face = face
128
+ self.channels = channels
129
+ self.cols = 4 # grid mask
130
+ self.rows = 4 # grid mask
131
+ self.idx = idx # grid mask
132
+
133
+ mask = self.build_mask()
134
+ self.mask = self.merge_mask(mask)
135
+ # logger.info("Initialized %s", self.__class__.__name__)
136
+
137
+ def build_mask(self):
138
+ """ Override to build the mask """
139
+ raise NotImplementedError
140
+
141
+ def merge_mask(self, mask):
142
+ """ Return the mask in requested shape """
143
+ # logger.info("mask_shape: %s", mask.shape)
144
+ assert self.channels in (1, 3, 4), "Channels should be 1, 3 or 4"
145
+ assert mask.shape[2] == 1 and mask.ndim == 3, "Input mask be 3 dimensions with 1 channel"
146
+
147
+ if self.channels == 3:
148
+ retval = np.tile(mask, 3)
149
+ elif self.channels == 4:
150
+ retval = np.concatenate((self.face, mask), -1)
151
+ else:
152
+ retval = mask
153
+
154
+ # logger.info("Final mask shape: %s", retval.shape)
155
+ return retval
156
+
157
+
158
+ class dfl_full(Mask): # pylint: disable=invalid-name
159
+ """ DFL facial mask """
160
+ def build_mask(self):
161
+ mask = np.zeros(self.face.shape[0:2] + (1, ), dtype=np.float32)
162
+
163
+ nose_ridge = (self.landmarks[27:31], self.landmarks[33:34])
164
+ jaw = (self.landmarks[0:17],
165
+ self.landmarks[48:68],
166
+ self.landmarks[0:1],
167
+ self.landmarks[8:9],
168
+ self.landmarks[16:17])
169
+ eyes = (self.landmarks[17:27],
170
+ self.landmarks[0:1],
171
+ self.landmarks[27:28],
172
+ self.landmarks[16:17],
173
+ self.landmarks[33:34])
174
+ parts = [jaw, nose_ridge, eyes]
175
+
176
+ for item in parts:
177
+ merged = np.concatenate(item)
178
+ cv2.fillConvexPoly(mask, cv2.convexHull(merged), 255.) # pylint: disable=no-member
179
+ return mask
180
+
181
+
182
+ class components(Mask): # pylint: disable=invalid-name
183
+ """ Component model mask """
184
+ def build_mask(self):
185
+ mask = np.zeros(self.face.shape[0:2] + (1, ), dtype=np.float32)
186
+
187
+ r_jaw = (self.landmarks[0:9], self.landmarks[17:18])
188
+ l_jaw = (self.landmarks[8:17], self.landmarks[26:27])
189
+ r_cheek = (self.landmarks[17:20], self.landmarks[8:9])
190
+ l_cheek = (self.landmarks[24:27], self.landmarks[8:9])
191
+ nose_ridge = (self.landmarks[19:25], self.landmarks[8:9],)
192
+ r_eye = (self.landmarks[17:22],
193
+ self.landmarks[27:28],
194
+ self.landmarks[31:36],
195
+ self.landmarks[8:9])
196
+ l_eye = (self.landmarks[22:27],
197
+ self.landmarks[27:28],
198
+ self.landmarks[31:36],
199
+ self.landmarks[8:9])
200
+ nose = (self.landmarks[27:31], self.landmarks[31:36])
201
+ parts = [r_jaw, l_jaw, r_cheek, l_cheek, nose_ridge, r_eye, l_eye, nose]
202
+
203
+ # ---change 0531 random select parts ---
204
+ # r_face = (self.landmarks[0:9], self.landmarks[17:18],self.landmarks[17:20], self.landmarks[8:9])
205
+ # l_face = (self.landmarks[8:17], self.landmarks[26:27],self.landmarks[24:27], self.landmarks[8:9])
206
+ # nose_final = (self.landmarks[19:25], self.landmarks[8:9],self.landmarks[27:31], self.landmarks[31:36])
207
+ # parts = [r_face,l_face,nose_final,r_eye,l_eye]
208
+ # num_to_select = random.randint(1, len(parts))
209
+ # parts = random.sample(parts, num_to_select)
210
+ # print(len(parts), parts[0])
211
+ # ---change 0531 random select parts ---
212
+
213
+ for item in parts:
214
+ merged = np.concatenate(item)
215
+ cv2.fillConvexPoly(mask, cv2.convexHull(merged), 255.) # pylint: disable=no-member
216
+ return mask
217
+
218
+
219
+ class extended(Mask): # pylint: disable=invalid-name
220
+ """ Extended mask
221
+ Based on components mask. Attempts to extend the eyebrow points up the forehead
222
+ """
223
+ def build_mask(self):
224
+ mask = np.zeros(self.face.shape[0:2] + (1, ), dtype=np.float32)
225
+
226
+ landmarks = self.landmarks.copy()
227
+ # mid points between the side of face and eye point
228
+ ml_pnt = (landmarks[36] + landmarks[0]) // 2
229
+ mr_pnt = (landmarks[16] + landmarks[45]) // 2
230
+
231
+ # mid points between the mid points and eye
232
+ ql_pnt = (landmarks[36] + ml_pnt) // 2
233
+ qr_pnt = (landmarks[45] + mr_pnt) // 2
234
+
235
+ # Top of the eye arrays
236
+ bot_l = np.array((ql_pnt, landmarks[36], landmarks[37], landmarks[38], landmarks[39]))
237
+ bot_r = np.array((landmarks[42], landmarks[43], landmarks[44], landmarks[45], qr_pnt))
238
+
239
+ # Eyebrow arrays
240
+ top_l = landmarks[17:22]
241
+ top_r = landmarks[22:27]
242
+
243
+ # Adjust eyebrow arrays
244
+ landmarks[17:22] = top_l + ((top_l - bot_l) // 2)
245
+ landmarks[22:27] = top_r + ((top_r - bot_r) // 2)
246
+
247
+ r_jaw = (landmarks[0:9], landmarks[17:18])
248
+ l_jaw = (landmarks[8:17], landmarks[26:27])
249
+ r_cheek = (landmarks[17:20], landmarks[8:9])
250
+ l_cheek = (landmarks[24:27], landmarks[8:9])
251
+ nose_ridge = (landmarks[19:25], landmarks[8:9],)
252
+ r_eye = (landmarks[17:22], landmarks[27:28], landmarks[31:36], landmarks[8:9])
253
+ l_eye = (landmarks[22:27], landmarks[27:28], landmarks[31:36], landmarks[8:9])
254
+ nose = (landmarks[27:31], landmarks[31:36])
255
+ parts = [r_jaw, l_jaw, r_cheek, l_cheek, nose_ridge, r_eye, l_eye, nose]
256
+
257
+ for item in parts:
258
+ merged = np.concatenate(item)
259
+ cv2.fillConvexPoly(mask, cv2.convexHull(merged), 255.) # pylint: disable=no-member
260
+ return mask
261
+
262
+
263
+ class facehull(Mask): # pylint: disable=invalid-name
264
+ """ Basic face hull mask """
265
+ def build_mask(self):
266
+ mask = np.zeros(self.face.shape[0:2] + (1, ), dtype=np.float32)
267
+ hull = cv2.convexHull( # pylint: disable=no-member
268
+ np.array(self.landmarks).reshape((-1, 2)))
269
+ cv2.fillConvexPoly(mask, hull, 255.0, lineType=cv2.LINE_AA) # pylint: disable=no-member
270
+ return mask
271
+ # mask = np.zeros(img.shape[0:2] + (1, ), dtype=np.float32)
272
+ # hull = cv2.convexHull(np.array(landmark).reshape((-1, 2)))
273
+
274
+ class facehull2(Mask): # pylint: disable=invalid-name
275
+ """ Basic face hull mask """
276
+ def build_mask(self):
277
+ mask = np.zeros(self.face.shape[0:2] + (1, ), dtype=np.uint8)
278
+ hull = cv2.convexHull( # pylint: disable=no-member
279
+ np.array(self.landmarks).reshape((-1, 2)))
280
+ cv2.fillConvexPoly(mask, hull, 1.0, lineType=cv2.LINE_AA)
281
+ return mask
282
+
283
+
284
+
285
+ class gridMasking(Mask):
286
+
287
+ def build_mask(self):
288
+ h, w = self.face.shape[:2]
289
+ landmarks = self.landmarks[:68]
290
+ # if idx is None:
291
+ # idx = np.random.randint(0, self.total)
292
+ r, c = divmod(self.idx, self.cols) # 获得除数和余数,即这个idx对应第r行第c列
293
+
294
+ # pixel related
295
+ xmin, ymin, xmax, ymax = landmarks_to_bbox(landmarks)
296
+ dx = ceil((xmax - xmin) / self.cols)
297
+ dy = ceil((ymax - ymin) / self.rows)
298
+
299
+ mask = np.zeros((h, w), dtype=np.uint8)
300
+
301
+ # fill the cell mask
302
+ x0, y0 = floor(xmin + dx * c), floor(ymin + dy * r)
303
+ x1, y1 = floor(x0 + dx), floor(y0 + dy)
304
+ cv2.rectangle(mask, (x0, y0), (x1, y1), 255, -1)
305
+
306
+ # merge the cell mask with the convex hull
307
+ ch = mask_from_points(self.face, landmarks)
308
+ # ch = cv2.cvtColor(ch, cv2.COLOR_BGR2GRAY)
309
+ # mask = (mask & ch) / 255.0
310
+ mask = cv2.bitwise_and(mask, mask, mask=ch)
311
+ mask = mask.reshape([mask.shape[0],mask.shape[1], 1])
312
+ # cv2.bitwise_or(img, d_3c_i)
313
+
314
+ return mask
315
+
316
+ class MeshgridMasking(Mask):
317
+ areas = [
318
+ [1, 2, 3, 4, 5, 6, 7, 49, 32, 40, 41, 42, 37, 18],
319
+ [37, 38, 39, 40, 41, 42], # left eye
320
+ [18, 19, 20, 21, 22, 28, 40, 39, 38, 37],
321
+ [28, 29, 30, 31, 32, 40],
322
+ ]
323
+ areas_asym = [
324
+ [20, 21, 22, 28, 23, 24, 25], # old [22, 23, 28],
325
+ [31, 32, 33, 34, 35, 36],
326
+ [32, 33, 34, 35, 36, 55, 54, 53, 52, 51, 50, 49],
327
+ [49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60],
328
+ [7, 8, 9, 10, 11, 55, 56, 57, 58, 59, 60, 49],
329
+ ]
330
+
331
+ def init(self, **kwargs):
332
+ # super().__init__(**kwargs)
333
+
334
+ sym, _ = landmarks_68_symmetries()
335
+ # construct list of points paths
336
+ paths = []
337
+ paths += self.areas_asym # asymmetrical areas
338
+ paths += self.areas # left
339
+ paths += [[sym[ld68_id] for ld68_id in area] for area in self.areas] # right
340
+ assert len(paths) == self.total
341
+ self.paths = paths
342
+
343
+ @property
344
+ def total(self) -> int:
345
+ total = len(self.areas_asym) + len(self.areas) * 2
346
+ return total
347
+
348
+ def transform_landmarks(self, landmarks):
349
+ """Transform landmarks to extend the eyebrow points up the forehead"""
350
+ new_landmarks = landmarks.copy()
351
+ # mid points between the side of face and eye point
352
+ ml_pnt = (new_landmarks[36] + new_landmarks[0]) // 2
353
+ mr_pnt = (new_landmarks[16] + new_landmarks[45]) // 2
354
+
355
+ # mid points between the mid points and eye
356
+ ql_pnt = (new_landmarks[36] + ml_pnt) // 2
357
+ qr_pnt = (new_landmarks[45] + mr_pnt) // 2
358
+
359
+ # Top of the eye arrays
360
+ bot_l = np.array(
361
+ (
362
+ ql_pnt,
363
+ new_landmarks[36],
364
+ new_landmarks[37],
365
+ new_landmarks[38],
366
+ new_landmarks[39],
367
+ )
368
+ )
369
+ bot_r = np.array(
370
+ (
371
+ new_landmarks[42],
372
+ new_landmarks[43],
373
+ new_landmarks[44],
374
+ new_landmarks[45],
375
+ qr_pnt,
376
+ )
377
+ )
378
+
379
+ # Eyebrow arrays
380
+ top_l = new_landmarks[17:22]
381
+ top_r = new_landmarks[22:27]
382
+
383
+ # Adjust eyebrow arrays
384
+ new_landmarks[17:22] = top_l + ((top_l - bot_l) // 2)
385
+ new_landmarks[22:27] = top_r + ((top_r - bot_r) // 2)
386
+
387
+ return new_landmarks
388
+
389
+ def build_mask(self) -> np.ndarray:
390
+ self.init()
391
+ h, w = self.face.shape[:2]
392
+
393
+ path = self.paths[self.idx]
394
+ new_landmarks = self.transform_landmarks(self.landmarks)
395
+ points = [new_landmarks[ld68_id - 1] for ld68_id in path]
396
+ points = np.array(points, dtype=np.int32)
397
+
398
+ # cv2.fillConvexPoly(out, points, 255, lineType=4)
399
+ mask = np.zeros((h, w), dtype=np.uint8)
400
+ cv2.fillPoly(mask, [points], 255)
401
+ mask = mask.reshape([mask.shape[0],mask.shape[1], 1])
402
+ return mask
training/dataset/utils/SLADD.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import Enum
2
+ from functools import reduce
3
+
4
+ import cv2
5
+ import numpy as np
6
+ from scipy.ndimage import binary_dilation
7
+
8
+ from .DeepFakeMask import Mask
9
+
10
+
11
+ def dist(a, b):
12
+ x1, y1 = a
13
+ x2, y2 = b
14
+ return np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)
15
+ # return np.linalg.norm(a-b)
16
+
17
+
18
+ def get_five_key(landmarks_68):
19
+ # get the five key points by using the landmarks
20
+ leye_center = (landmarks_68[36] + landmarks_68[39]) * 0.5
21
+ reye_center = (landmarks_68[42] + landmarks_68[45]) * 0.5
22
+ nose = landmarks_68[33]
23
+ lmouth = landmarks_68[48]
24
+ rmouth = landmarks_68[54]
25
+ leye_left = landmarks_68[36]
26
+ leye_right = landmarks_68[39]
27
+ reye_left = landmarks_68[42]
28
+ reye_right = landmarks_68[45]
29
+ out = [
30
+ tuple(x.astype("int32"))
31
+ for x in [
32
+ leye_center,
33
+ reye_center,
34
+ nose,
35
+ lmouth,
36
+ rmouth,
37
+ leye_left,
38
+ leye_right,
39
+ reye_left,
40
+ reye_right,
41
+ ]
42
+ ]
43
+ return out
44
+
45
+
46
+ def remove_eyes(image, landmarks, opt):
47
+ ##l: left eye; r: right eye, b: both eye
48
+ if opt == "l":
49
+ (x1, y1), (x2, y2) = landmarks[5:7]
50
+ elif opt == "r":
51
+ (x1, y1), (x2, y2) = landmarks[7:9]
52
+ elif opt == "b":
53
+ (x1, y1), (x2, y2) = landmarks[:2]
54
+ else:
55
+ print("wrong region")
56
+ mask = np.zeros_like(image[..., 0])
57
+ line = cv2.line(mask, (x1, y1), (x2, y2), color=(1), thickness=2)
58
+ w = dist((x1, y1), (x2, y2))
59
+ dilation = int(w // 4)
60
+ if opt != "b":
61
+ dilation *= 4
62
+ line = binary_dilation(line, iterations=dilation)
63
+ return line
64
+
65
+
66
+ def remove_nose(image, landmarks):
67
+ (x1, y1), (x2, y2) = landmarks[:2]
68
+ x3, y3 = landmarks[2]
69
+ mask = np.zeros_like(image[..., 0])
70
+ x4 = int((x1 + x2) / 2)
71
+ y4 = int((y1 + y2) / 2)
72
+ line = cv2.line(mask, (x3, y3), (x4, y4), color=(1), thickness=2)
73
+ w = dist((x1, y1), (x2, y2))
74
+ dilation = int(w // 4)
75
+ line = binary_dilation(line, iterations=dilation)
76
+ return line
77
+
78
+
79
+ def remove_mouth(image, landmarks):
80
+ (x1, y1), (x2, y2) = landmarks[3:5]
81
+ mask = np.zeros_like(image[..., 0])
82
+ line = cv2.line(mask, (x1, y1), (x2, y2), color=(1), thickness=2)
83
+ w = dist((x1, y1), (x2, y2))
84
+ dilation = int(w // 3)
85
+ line = binary_dilation(line, iterations=dilation)
86
+ return line
87
+
88
+
89
+ class SladdRegion(Enum):
90
+ left_eye = 0
91
+ right_eye = 1
92
+ nose = 2
93
+ mouth = 3
94
+ # composition
95
+ both_eyes = left_eye + right_eye # 4
96
+
97
+
98
+ class SladdMasking(Mask):
99
+
100
+ # [0, 1, 2, 3, (0, 1), (0, 2), (1, 2), (2, 3), (0, 1, 2), (0, 1, 2, 3)]
101
+ # left-eye, right-eye, nose, mouth, ...
102
+ ALL_REGIONS = [
103
+ SladdRegion.left_eye,
104
+ SladdRegion.right_eye,
105
+ SladdRegion.nose,
106
+ SladdRegion.mouth,
107
+ ]
108
+ REGIONS = [
109
+ [SladdRegion.left_eye],
110
+ [SladdRegion.right_eye],
111
+ [SladdRegion.nose],
112
+ [SladdRegion.mouth],
113
+ [SladdRegion.left_eye, SladdRegion.right_eye],
114
+ [SladdRegion.left_eye, SladdRegion.nose],
115
+ [SladdRegion.right_eye, SladdRegion.nose],
116
+ [SladdRegion.nose, SladdRegion.mouth],
117
+ [SladdRegion.left_eye, SladdRegion.right_eye, SladdRegion.nose],
118
+ ALL_REGIONS,
119
+ ]
120
+
121
+ def init(self, compose: bool = False, single: bool = True, **kwargs):
122
+ # super().__init__(**kwargs)
123
+ self.compose = compose
124
+ if compose:
125
+ self.regions = SladdMasking.REGIONS
126
+ else:
127
+ self.regions = [reg for reg in SladdMasking.REGIONS if len(reg) == 1]
128
+ if single:
129
+ self.regions = [self.ALL_REGIONS]
130
+
131
+ @property
132
+ def total(self) -> int:
133
+ return len(self.regions)
134
+
135
+ @staticmethod
136
+ def parse(img, reg, landmarks) -> np.ndarray:
137
+ five_key = get_five_key(landmarks)
138
+ if reg is SladdRegion.left_eye:
139
+ mask = remove_eyes(img, five_key, "l")
140
+ elif reg is SladdRegion.right_eye:
141
+ mask = remove_eyes(img, five_key, "r")
142
+ elif reg is SladdRegion.nose:
143
+ mask = remove_nose(img, five_key)
144
+ elif reg is SladdRegion.mouth:
145
+ mask = remove_mouth(img, five_key)
146
+ else:
147
+ raise ValueError("Invalid region")
148
+ # elif reg == SladdRegion4:
149
+ # mask = remove_eyes(img, five_key, "b")
150
+ return mask
151
+
152
+ def build_mask(self) -> np.ndarray:
153
+ self.init()
154
+ h, w = self.face.shape[:2]
155
+ # print(len(self.regions))
156
+ regs = [self.regions[0][self.idx]]
157
+ # if isinstance(reg, int):
158
+ # mask = parse(img, reg, landmarks)
159
+ masks = [SladdMasking.parse(self.face, reg, self.landmarks) for reg in regs]
160
+ mask = reduce(np.maximum, masks)
161
+ mask = mask.reshape([mask.shape[0],mask.shape[1], 1])
162
+
163
+ return mask
training/dataset/utils/attribution_mask.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ import cv2
4
+ import math
5
+ import numpy as np
6
+ from scipy.ndimage import binary_erosion, binary_dilation
7
+ def dist(p1, p2):
8
+ return math.sqrt((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2)
9
+
10
+ def remove_mouth(image, landmarks):
11
+ (x1, y1), (x2, y2) = landmarks[3:5]
12
+ mask = np.zeros_like(image[..., 0])
13
+ line = cv2.line(mask, (x1, y1), (x2, y2), color=(1), thickness=2)
14
+ w = dist((x1, y1), (x2, y2))
15
+ dilation = int(w // 3)
16
+ line = binary_dilation(line, iterations=dilation)
17
+ return line
18
+
19
+ def remove_eyes(image, landmarks, opt='b'):
20
+ ##l: left eye; r: right eye, b: both eye
21
+ if opt == 'l':
22
+ (x1, y1), (x2, y2) = landmarks[36],landmarks[39]
23
+ elif opt == 'r':
24
+ (x1, y1), (x2, y2) = landmarks[42],landmarks[46]
25
+ elif opt == 'b':
26
+ (x1, y1), (x2, y2) = landmarks[36],landmarks[46]
27
+ else:
28
+ print('wrong region')
29
+ mask = np.zeros_like(image[..., 0])
30
+ line = cv2.line(np.array(mask, dtype=np.uint8), (int(x1), int(y1)), (int(x2), int(y2)), color=(1), thickness=2)
31
+ w = dist((x1, y1), (x2, y2))
32
+ dilation = int(w // 4)
33
+ if opt != 'b':
34
+ dilation *= 4
35
+ line = binary_dilation(line, iterations=dilation)
36
+ return line
37
+
38
+ def remove_nose(image, landmarks):
39
+ ##l: left eye; r: right eye, b: both eye
40
+
41
+ (x1, y1), (x2, y2) = landmarks[27], landmarks[30]
42
+ mask = np.zeros_like(image[..., 0])
43
+ line = cv2.line(np.array(mask, dtype=np.uint8), (int(x1), int(y1)), (int(x2), int(y2)), color=(1), thickness=2)
44
+ w = dist((x1, y1), (x2, y2))
45
+ dilation = int(w // 3)
46
+ line1 = binary_dilation(line, iterations=dilation)
47
+
48
+ (x1, y1), (x2, y2) = landmarks[31], landmarks[35]
49
+ mask = np.zeros_like(image[..., 0])
50
+ line = cv2.line(np.array(mask, dtype=np.uint8), (int(x1), int(y1)), (int(x2), int(y2)), color=(1), thickness=2)
51
+ w = dist((x1, y1), (x2, y2))
52
+ dilation = int(w //4 )
53
+ line2 = binary_dilation(line, iterations=dilation)
54
+
55
+ return line1+line2
training/dataset/utils/bi_online_generation.py ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dlib
2
+ from skimage import io
3
+ from skimage import transform as sktransform
4
+ import numpy as np
5
+ from matplotlib import pyplot as plt
6
+ import json
7
+ import os
8
+ import random
9
+ from PIL import Image
10
+ from imgaug import augmenters as iaa
11
+ from dataset.library.DeepFakeMask import dfl_full,facehull,components,extended
12
+ from dataset.utils.attribution_mask import *
13
+ import cv2
14
+ import tqdm
15
+
16
+ '''
17
+ from PIL import ImageDraw
18
+ # 创建一个可以在图像上绘制的对象
19
+ img_pil=Image.fromarray(img)
20
+ draw = ImageDraw.Draw(img_pil)
21
+
22
+ # 在图像上绘制点
23
+ for i, point in enumerate(landmark):
24
+ x, y = point
25
+ radius = 1 # 点的半径
26
+ draw.ellipse((x-radius, y-radius, x+radius, y+radius), fill="red")
27
+ draw.text((x+radius+2, y-radius), str(i), fill="black") # 在点旁边添加标签
28
+ img_pil.show()
29
+ '''
30
+
31
+
32
+ def name_resolve(path):
33
+ name = os.path.splitext(os.path.basename(path))[0]
34
+ vid_id, frame_id = name.split('_')[0:2]
35
+ return vid_id, frame_id
36
+
37
+ def total_euclidean_distance(a,b):
38
+ assert len(a.shape) == 2
39
+ return np.sum(np.linalg.norm(a-b,axis=1))
40
+
41
+ def get_five_key(landmarks_68):
42
+ # get the five key points by using the landmarks
43
+ leye_center = (landmarks_68[36] + landmarks_68[39])*0.5
44
+ reye_center = (landmarks_68[42] + landmarks_68[45])*0.5
45
+ nose = landmarks_68[33]
46
+ lmouth = landmarks_68[48]
47
+ rmouth = landmarks_68[54]
48
+ leye_left = landmarks_68[36]
49
+ leye_right = landmarks_68[39]
50
+ reye_left = landmarks_68[42]
51
+ reye_right = landmarks_68[45]
52
+ out = [ tuple(x.astype('int32')) for x in [
53
+ leye_center,reye_center,nose,lmouth,rmouth,leye_left,leye_right,reye_left,reye_right
54
+ ]]
55
+ return out
56
+
57
+ def random_get_hull(landmark,img1,hull_type=None):
58
+ if hull_type==None:
59
+ hull_type = random.choice([0,1,2,3])
60
+ if hull_type == 0:
61
+ mask = dfl_full(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
62
+ return mask[:,:,0]/255
63
+ elif hull_type == 1:
64
+ mask = extended(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
65
+ return mask[:,:,0]/255
66
+ elif hull_type == 2:
67
+ mask = components(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
68
+ return mask[:,:,0]/255
69
+ elif hull_type == 3:
70
+ mask = facehull(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
71
+ return mask[:,:,0]/255
72
+ elif hull_type == 4:
73
+ mask = remove_mouth(img1,get_five_key(landmark))
74
+ return mask.astype(np.float32)
75
+ elif hull_type == 5:
76
+ mask = remove_eyes(img1,landmark)
77
+ return mask.astype(np.float32)
78
+ elif hull_type == 6:
79
+ mask = remove_nose(img1,landmark)
80
+ return mask.astype(np.float32)
81
+ elif hull_type == 7:
82
+ mask = remove_nose(img1,landmark) + remove_eyes(img1,landmark) + remove_mouth(img1,get_five_key(landmark))
83
+ return mask.astype(np.float32)
84
+
85
+
86
+ def random_erode_dilate(mask, ksize=None):
87
+ if random.random()>0.5:
88
+ if ksize is None:
89
+ ksize = random.randint(1,21)
90
+ if ksize % 2 == 0:
91
+ ksize += 1
92
+ mask = np.array(mask).astype(np.uint8)*255
93
+ kernel = np.ones((ksize,ksize),np.uint8)
94
+ mask = cv2.erode(mask,kernel,1)/255
95
+ else:
96
+ if ksize is None:
97
+ ksize = random.randint(1,5)
98
+ if ksize % 2 == 0:
99
+ ksize += 1
100
+ mask = np.array(mask).astype(np.uint8)*255
101
+ kernel = np.ones((ksize,ksize),np.uint8)
102
+ mask = cv2.dilate(mask,kernel,1)/255
103
+ return mask
104
+
105
+
106
+ # borrow from https://github.com/MarekKowalski/FaceSwap
107
+ def blendImages(src, dst, mask, featherAmount=0.2):
108
+
109
+ maskIndices = np.where(mask != 0)
110
+
111
+ src_mask = np.ones_like(mask)
112
+ dst_mask = np.zeros_like(mask)
113
+
114
+ maskPts = np.hstack((maskIndices[1][:, np.newaxis], maskIndices[0][:, np.newaxis]))
115
+ faceSize = np.max(maskPts, axis=0) - np.min(maskPts, axis=0)
116
+ featherAmount = featherAmount * np.max(faceSize)
117
+
118
+ hull = cv2.convexHull(maskPts)
119
+ dists = np.zeros(maskPts.shape[0])
120
+ for i in range(maskPts.shape[0]):
121
+ dists[i] = cv2.pointPolygonTest(hull, (maskPts[i, 0], maskPts[i, 1]), True)
122
+
123
+ weights = np.clip(dists / featherAmount, 0, 1)
124
+
125
+ composedImg = np.copy(dst)
126
+ composedImg[maskIndices[0], maskIndices[1]] = weights[:, np.newaxis] * src[maskIndices[0], maskIndices[1]] + (1 - weights[:, np.newaxis]) * dst[maskIndices[0], maskIndices[1]]
127
+
128
+ composedMask = np.copy(dst_mask)
129
+ composedMask[maskIndices[0], maskIndices[1]] = weights[:, np.newaxis] * src_mask[maskIndices[0], maskIndices[1]] + (
130
+ 1 - weights[:, np.newaxis]) * dst_mask[maskIndices[0], maskIndices[1]]
131
+
132
+ return composedImg, composedMask
133
+
134
+
135
+ # borrow from https://github.com/MarekKowalski/FaceSwap
136
+ def colorTransfer(src, dst, mask):
137
+ transferredDst = np.copy(dst)
138
+
139
+ maskIndices = np.where(mask != 0)
140
+
141
+
142
+ maskedSrc = src[maskIndices[0], maskIndices[1]].astype(np.int32)
143
+ maskedDst = dst[maskIndices[0], maskIndices[1]].astype(np.int32)
144
+
145
+ meanSrc = np.mean(maskedSrc, axis=0)
146
+ meanDst = np.mean(maskedDst, axis=0)
147
+
148
+ maskedDst = maskedDst - meanDst
149
+ maskedDst = maskedDst + meanSrc
150
+ maskedDst = np.clip(maskedDst, 0, 255)
151
+
152
+ transferredDst[maskIndices[0], maskIndices[1]] = maskedDst
153
+
154
+ return transferredDst
155
+
156
+ class BIOnlineGeneration():
157
+ def __init__(self):
158
+ with open('precomuted_landmarks.json', 'r') as f:
159
+ self.landmarks_record = json.load(f)
160
+ for k,v in self.landmarks_record.items():
161
+ self.landmarks_record[k] = np.array(v)
162
+ # extract all frame from all video in the name of {videoid}_{frameid}
163
+ self.data_list = [
164
+ '000_0000.png',
165
+ '001_0000.png'
166
+ ] * 10000
167
+
168
+ # predefine mask distortion
169
+ self.distortion = iaa.Sequential([iaa.PiecewiseAffine(scale=(0.01, 0.15))])
170
+
171
+ def gen_one_datapoint(self):
172
+ background_face_path = random.choice(self.data_list)
173
+ data_type = 'real' if random.randint(0,1) else 'fake'
174
+ if data_type == 'fake' :
175
+ face_img,mask = self.get_blended_face(background_face_path)
176
+ mask = ( 1 - mask ) * mask * 4
177
+ else:
178
+ face_img = io.imread(background_face_path)
179
+ mask = np.zeros((317, 317, 1))
180
+
181
+ # randomly downsample after BI pipeline
182
+ if random.randint(0,1):
183
+ aug_size = random.randint(64, 317)
184
+ face_img = Image.fromarray(face_img)
185
+ if random.randint(0,1):
186
+ face_img = face_img.resize((aug_size, aug_size), Image.BILINEAR)
187
+ else:
188
+ face_img = face_img.resize((aug_size, aug_size), Image.NEAREST)
189
+ face_img = face_img.resize((317, 317),Image.BILINEAR)
190
+ face_img = np.array(face_img)
191
+
192
+ # random jpeg compression after BI pipeline
193
+ if random.randint(0,1):
194
+ quality = random.randint(60, 100)
195
+ encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), quality]
196
+ face_img_encode = cv2.imencode('.jpg', face_img, encode_param)[1]
197
+ face_img = cv2.imdecode(face_img_encode, cv2.IMREAD_COLOR)
198
+
199
+ face_img = face_img[60:317,30:287,:]
200
+ mask = mask[60:317,30:287,:]
201
+
202
+ # random flip
203
+ if random.randint(0,1):
204
+ face_img = np.flip(face_img,1)
205
+ mask = np.flip(mask,1)
206
+
207
+ return face_img,mask,data_type
208
+
209
+ def get_blended_face(self,background_face_path):
210
+ background_face = io.imread(background_face_path)
211
+ background_landmark = self.landmarks_record[background_face_path]
212
+
213
+ foreground_face_path = self.search_similar_face(background_landmark,background_face_path)
214
+ foreground_face = io.imread(foreground_face_path)
215
+
216
+ # down sample before blending
217
+ aug_size = random.randint(128,317)
218
+ background_landmark = background_landmark * (aug_size/317)
219
+ foreground_face = sktransform.resize(foreground_face,(aug_size,aug_size),preserve_range=True).astype(np.uint8)
220
+ background_face = sktransform.resize(background_face,(aug_size,aug_size),preserve_range=True).astype(np.uint8)
221
+
222
+ # get random type of initial blending mask
223
+ mask = random_get_hull(background_landmark, background_face)
224
+
225
+ # random deform mask
226
+ mask = self.distortion.augment_image(mask)
227
+ mask = random_erode_dilate(mask)
228
+
229
+ # filte empty mask after deformation
230
+ if np.sum(mask) == 0 :
231
+ raise NotImplementedError
232
+
233
+ # apply color transfer
234
+ foreground_face = colorTransfer(background_face, foreground_face, mask*255)
235
+
236
+ # blend two face
237
+ blended_face, mask = blendImages(foreground_face, background_face, mask*255)
238
+ blended_face = blended_face.astype(np.uint8)
239
+
240
+ # resize back to default resolution
241
+ blended_face = sktransform.resize(blended_face,(317,317),preserve_range=True).astype(np.uint8)
242
+ mask = sktransform.resize(mask,(317,317),preserve_range=True)
243
+ mask = mask[:,:,0:1]
244
+ return blended_face,mask
245
+
246
+ def search_similar_face(self,this_landmark,background_face_path):
247
+ vid_id, frame_id = name_resolve(background_face_path)
248
+ min_dist = 99999999
249
+
250
+ # random sample 5000 frame from all frams:
251
+ all_candidate_path = random.sample( self.data_list, k=5000)
252
+
253
+ # filter all frame that comes from the same video as background face
254
+ all_candidate_path = filter(lambda k:name_resolve(k)[0] != vid_id, all_candidate_path)
255
+ all_candidate_path = list(all_candidate_path)
256
+
257
+ # loop throungh all candidates frame to get best match
258
+ for candidate_path in all_candidate_path:
259
+ candidate_landmark = self.landmarks_record[candidate_path].astype(np.float32)
260
+ candidate_distance = total_euclidean_distance(candidate_landmark, this_landmark)
261
+ if candidate_distance < min_dist:
262
+ min_dist = candidate_distance
263
+ min_path = candidate_path
264
+
265
+ return min_path
266
+
267
+ if __name__ == '__main__':
268
+ ds = BIOnlineGeneration()
269
+ from tqdm import tqdm
270
+ all_imgs = []
271
+ for _ in tqdm(range(50)):
272
+ img,mask,label = ds.gen_one_datapoint()
273
+ mask = np.repeat(mask,3,2)
274
+ mask = (mask*255).astype(np.uint8)
275
+ img_cat = np.concatenate([img,mask],1)
276
+ all_imgs.append(img_cat)
277
+ all_in_one = Image.new('RGB', (2570,2570))
278
+
279
+ for x in range(5):
280
+ for y in range(10):
281
+ idx = x*10+y
282
+ im = Image.fromarray(all_imgs[idx])
283
+
284
+ dx = x*514
285
+ dy = y*257
286
+
287
+ all_in_one.paste(im, (dx,dy))
288
+
289
+ all_in_one.save("all_in_one.jpg")
training/dataset/utils/bi_online_generation_yzy.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dlib
2
+ from skimage import io
3
+ from skimage import transform as sktransform
4
+ import numpy as np
5
+ from matplotlib import pyplot as plt
6
+ import json
7
+ import os
8
+ import random
9
+ from PIL import Image
10
+ from imgaug import augmenters as iaa
11
+ from .DeepFakeMask import dfl_full,facehull,components,extended,gridMasking,MeshgridMasking, facehull2
12
+ from .SLADD import SladdMasking
13
+ import cv2
14
+ import torch
15
+ import torch.nn as nn
16
+ import tqdm
17
+ import pdb
18
+
19
+
20
+ def name_resolve(path):
21
+ name = os.path.splitext(os.path.basename(path))[0]
22
+ vid_id, frame_id = name.split('_')[0:2]
23
+ return vid_id, frame_id
24
+
25
+ def total_euclidean_distance(a,b):
26
+ assert len(a.shape) == 2
27
+ return np.sum(np.linalg.norm(a-b,axis=1))
28
+
29
+ def random_get_hull(landmark,img1,hull_type0, idx=0):
30
+ # print("in bi online generation----------",hull_type0)
31
+ if hull_type0 == -1:
32
+ hull_type = random.choice([0,1,2,3])
33
+ else:
34
+ # hull_type = int(random.choice(hull_type0))
35
+ hull_type = hull_type0
36
+ # print(hull_type)
37
+ if hull_type == 0:
38
+ # print("here")
39
+ mask = dfl_full(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
40
+ return mask/255, idx
41
+ elif hull_type == 1:
42
+ mask = extended(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
43
+ return mask/255, idx
44
+ elif hull_type == 2:
45
+ mask = components(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
46
+ return mask/255, idx
47
+ elif hull_type == 3:
48
+ mask = facehull(landmarks=landmark.astype('int32'),face=img1, channels=3).mask
49
+ return mask/255, idx # --change0628-- mask/255
50
+
51
+ # elif hull_type == 4: # SLADD
52
+ # mask = SladdMasking(landmarks=landmark.astype('int32'),face=img1, channels=3, idx=0).mask
53
+ # return mask/1., idx
54
+ # elif hull_type == 5: # SLADD
55
+ # mask = SladdMasking(landmarks=landmark.astype('int32'),face=img1, channels=3, idx=1).mask
56
+ # return mask/1., idx
57
+ # elif hull_type == 6: # SLADD
58
+ # mask = SladdMasking(landmarks=landmark.astype('int32'),face=img1, channels=3, idx=2).mask
59
+ # return mask/1., idx
60
+ elif hull_type == 6: # SLADD/mouth
61
+ mask = SladdMasking(landmarks=landmark.astype('int32'),face=img1, channels=3, idx=3).mask
62
+ return mask/1., idx
63
+
64
+
65
+ def random_erode_dilate(mask, ksize=None):
66
+ if random.random()>0.5:
67
+ if ksize is None:
68
+ ksize = random.randint(1,21)
69
+ if ksize % 2 == 0:
70
+ ksize += 1
71
+ mask = np.array(mask).astype(np.uint8)*255
72
+ kernel = np.ones((ksize,ksize),np.uint8)
73
+ mask = cv2.erode(mask,kernel,1)/255
74
+ else:
75
+ if ksize is None:
76
+ ksize = random.randint(1,5)
77
+ if ksize % 2 == 0:
78
+ ksize += 1
79
+ mask = np.array(mask).astype(np.uint8)*255
80
+ kernel = np.ones((ksize,ksize),np.uint8)
81
+ mask = cv2.dilate(mask,kernel,1)/255
82
+ return mask
83
+
84
+
85
+ # borrow from https://github.com/MarekKowalski/FaceSwap
86
+ def blendImages(src, dst, mask, featherAmount=0.2):
87
+
88
+ maskIndices = np.where(mask != 0)
89
+
90
+ src_mask = np.ones_like(mask)
91
+ dst_mask = np.zeros_like(mask)
92
+
93
+ maskPts = np.hstack((maskIndices[1][:, np.newaxis], maskIndices[0][:, np.newaxis]))
94
+ faceSize = np.max(maskPts, axis=0) - np.min(maskPts, axis=0)
95
+ featherAmount = featherAmount * np.max(faceSize)
96
+
97
+ hull = cv2.convexHull(maskPts)
98
+ dists = np.zeros(maskPts.shape[0])
99
+ for i in range(maskPts.shape[0]):
100
+ dists[i] = cv2.pointPolygonTest(hull, (maskPts[i, 0], maskPts[i, 1]), True)
101
+
102
+ weights = np.clip(dists / featherAmount, 0, 1)
103
+
104
+ composedImg = np.copy(dst)
105
+ composedImg[maskIndices[0], maskIndices[1]] = weights[:, np.newaxis] * src[maskIndices[0], maskIndices[1]] + (1 - weights[:, np.newaxis]) * dst[maskIndices[0], maskIndices[1]]
106
+
107
+ composedMask = np.copy(dst_mask)
108
+ composedMask[maskIndices[0], maskIndices[1]] = weights[:, np.newaxis] * src_mask[maskIndices[0], maskIndices[1]] + (
109
+ 1 - weights[:, np.newaxis]) * dst_mask[maskIndices[0], maskIndices[1]]
110
+
111
+ return composedImg, composedMask
112
+
113
+
114
+ # borrow from https://github.com/MarekKowalski/FaceSwap
115
+ def colorTransfer(src, dst, mask):
116
+ transferredDst = np.copy(dst)
117
+
118
+ maskIndices = np.where(mask != 0)
119
+
120
+
121
+ maskedSrc = src[maskIndices[0], maskIndices[1]].astype(np.int32)
122
+ maskedDst = dst[maskIndices[0], maskIndices[1]].astype(np.int32)
123
+
124
+ meanSrc = np.mean(maskedSrc, axis=0)
125
+ meanDst = np.mean(maskedDst, axis=0)
126
+
127
+ maskedDst = maskedDst - meanDst
128
+ maskedDst = maskedDst + meanSrc
129
+ maskedDst = np.clip(maskedDst, 0, 255)
130
+
131
+ transferredDst[maskIndices[0], maskIndices[1]] = maskedDst
132
+
133
+ return transferredDst
134
+
135
+ class BIOnlineGeneration():
136
+ def __init__(self):
137
+ with open('precomuted_landmarks.json', 'r') as f:
138
+ self.landmarks_record = json.load(f)
139
+ for k,v in self.landmarks_record.items():
140
+ self.landmarks_record[k] = np.array(v)
141
+ # extract all frame from all video in the name of {videoid}_{frameid}
142
+ self.data_list = [
143
+ '000_0000.png',
144
+ '001_0000.png'
145
+ ] * 10000
146
+
147
+ # predefine mask distortion
148
+ self.distortion = iaa.Sequential([iaa.PiecewiseAffine(scale=(0.01, 0.15))])
149
+
150
+ def gen_one_datapoint(self):
151
+ background_face_path = random.choice(self.data_list)
152
+ data_type = 'real' if random.randint(0,1) else 'fake'
153
+ if data_type == 'fake' :
154
+ face_img,mask = self.get_blended_face(background_face_path)
155
+ mask = ( 1 - mask ) * mask * 4
156
+ else:
157
+ face_img = io.imread(background_face_path)
158
+ mask = np.zeros((317, 317, 1))
159
+
160
+ # randomly downsample after BI pipeline
161
+ if random.randint(0,1):
162
+ aug_size = random.randint(64, 317)
163
+ face_img = Image.fromarray(face_img)
164
+ if random.randint(0,1):
165
+ face_img = face_img.resize((aug_size, aug_size), Image.BILINEAR)
166
+ else:
167
+ face_img = face_img.resize((aug_size, aug_size), Image.NEAREST)
168
+ face_img = face_img.resize((317, 317),Image.BILINEAR)
169
+ face_img = np.array(face_img)
170
+
171
+ # random jpeg compression after BI pipeline
172
+ if random.randint(0,1):
173
+ quality = random.randint(60, 100)
174
+ encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), quality]
175
+ face_img_encode = cv2.imencode('.jpg', face_img, encode_param)[1]
176
+ face_img = cv2.imdecode(face_img_encode, cv2.IMREAD_COLOR)
177
+
178
+ face_img = face_img[60:317,30:287,:]
179
+ mask = mask[60:317,30:287,:]
180
+
181
+ # random flip
182
+ if random.randint(0,1):
183
+ face_img = np.flip(face_img,1)
184
+ mask = np.flip(mask,1)
185
+
186
+ return face_img,mask,data_type
187
+
188
+ def get_blended_face(self,background_face_path):
189
+ background_face = io.imread(background_face_path)
190
+ background_landmark = self.landmarks_record[background_face_path]
191
+
192
+ foreground_face_path = self.search_similar_face(background_landmark,background_face_path)
193
+ foreground_face = io.imread(foreground_face_path)
194
+
195
+ # down sample before blending
196
+ aug_size = random.randint(128,317)
197
+ background_landmark = background_landmark * (aug_size/317)
198
+ foreground_face = sktransform.resize(foreground_face,(aug_size,aug_size),preserve_range=True).astype(np.uint8)
199
+ background_face = sktransform.resize(background_face,(aug_size,aug_size),preserve_range=True).astype(np.uint8)
200
+
201
+ # get random type of initial blending mask
202
+ mask, idx = random_get_hull(background_landmark, background_face)
203
+
204
+ # random deform mask
205
+ mask = self.distortion.augment_image(mask)
206
+ mask = random_erode_dilate(mask)
207
+
208
+ # filte empty mask after deformation
209
+ if np.sum(mask) == 0 :
210
+ raise NotImplementedError
211
+
212
+ # apply color transfer
213
+ foreground_face = colorTransfer(background_face, foreground_face, mask*255)
214
+
215
+ # blend two face
216
+ blended_face, mask = blendImages(foreground_face, background_face, mask*255)
217
+ blended_face = blended_face.astype(np.uint8)
218
+
219
+ # resize back to default resolution
220
+ blended_face = sktransform.resize(blended_face,(317,317),preserve_range=True).astype(np.uint8)
221
+ mask = sktransform.resize(mask,(317,317),preserve_range=True)
222
+ mask = mask[:,:,0:1]
223
+ return blended_face,mask
224
+
225
+ def search_similar_face(self,this_landmark,background_face_path):
226
+ vid_id, frame_id = name_resolve(background_face_path)
227
+ min_dist = 99999999
228
+
229
+ # random sample 5000 frame from all frams:
230
+ all_candidate_path = random.sample( self.data_list, k=5000)
231
+
232
+ # filter all frame that comes from the same video as background face
233
+ all_candidate_path = filter(lambda k:name_resolve(k)[0] != vid_id, all_candidate_path)
234
+ all_candidate_path = list(all_candidate_path)
235
+
236
+ # loop throungh all candidates frame to get best match
237
+ for candidate_path in all_candidate_path:
238
+ candidate_landmark = self.landmarks_record[candidate_path].astype(np.float32)
239
+ candidate_distance = total_euclidean_distance(candidate_landmark, this_landmark)
240
+ if candidate_distance < min_dist:
241
+ min_dist = candidate_distance
242
+ min_path = candidate_path
243
+
244
+ return min_path
245
+
246
+ if __name__ == '__main__':
247
+ ds = BIOnlineGeneration()
248
+ from tqdm import tqdm
249
+ all_imgs = []
250
+ for _ in tqdm(range(50)):
251
+ img,mask,label = ds.gen_one_datapoint()
252
+ mask = np.repeat(mask,3,2)
253
+ mask = (mask*255).astype(np.uint8)
254
+ img_cat = np.concatenate([img,mask],1)
255
+ all_imgs.append(img_cat)
256
+ all_in_one = Image.new('RGB', (2570,2570))
257
+
258
+ for x in range(5):
259
+ for y in range(10):
260
+ idx = x*10+y
261
+ im = Image.fromarray(all_imgs[idx])
262
+
263
+ dx = x*514
264
+ dy = y*257
265
+
266
+ all_in_one.paste(im, (dx,dy))
267
+
268
+ all_in_one.save("all_in_one.jpg")
training/dataset/utils/color_transfer.py ADDED
@@ -0,0 +1,516 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ from numpy import linalg as npla
4
+
5
+ import scipy as sp
6
+ import scipy.sparse
7
+ from scipy.sparse.linalg import spsolve
8
+
9
+
10
+ def color_transfer_sot(src, trg, steps=10, batch_size=5, reg_sigmaXY=16.0, reg_sigmaV=5.0):
11
+ """
12
+ Color Transform via Sliced Optimal Transfer
13
+ ported by @iperov from https://github.com/dcoeurjo/OTColorTransfer
14
+
15
+ src - any float range any channel image
16
+ dst - any float range any channel image, same shape as src
17
+ steps - number of solver steps
18
+ batch_size - solver batch size
19
+ reg_sigmaXY - apply regularization and sigmaXY of filter, otherwise set to 0.0
20
+ reg_sigmaV - sigmaV of filter
21
+
22
+ return value - clip it manually
23
+ """
24
+ if not np.issubdtype(src.dtype, np.floating):
25
+ raise ValueError("src value must be float")
26
+ if not np.issubdtype(trg.dtype, np.floating):
27
+ raise ValueError("trg value must be float")
28
+
29
+ if len(src.shape) != 3:
30
+ raise ValueError("src shape must have rank 3 (h,w,c)")
31
+
32
+ if src.shape != trg.shape:
33
+ raise ValueError("src and trg shapes must be equal")
34
+
35
+ src_dtype = src.dtype
36
+ h, w, c = src.shape
37
+ new_src = src.copy()
38
+
39
+ for step in range(steps):
40
+ advect = np.zeros((h*w, c), dtype=src_dtype)
41
+ for batch in range(batch_size):
42
+ dir = np.random.normal(size=c).astype(src_dtype)
43
+ dir /= npla.norm(dir)
44
+
45
+ projsource = np.sum(new_src*dir, axis=-1).reshape((h*w))
46
+ projtarget = np.sum(trg*dir, axis=-1).reshape((h*w))
47
+
48
+ idSource = np.argsort(projsource)
49
+ idTarget = np.argsort(projtarget)
50
+
51
+ a = projtarget[idTarget]-projsource[idSource]
52
+ for i_c in range(c):
53
+ advect[idSource, i_c] += a * dir[i_c]
54
+ new_src += advect.reshape((h, w, c)) / batch_size
55
+
56
+ if reg_sigmaXY != 0.0:
57
+ src_diff = new_src-src
58
+ src_diff_filt = cv2.bilateralFilter(
59
+ src_diff, 0, reg_sigmaV, reg_sigmaXY)
60
+ if len(src_diff_filt.shape) == 2:
61
+ src_diff_filt = src_diff_filt[..., None]
62
+ new_src = src + src_diff_filt
63
+ return new_src
64
+
65
+
66
+ def color_transfer_mkl(x0, x1):
67
+ eps = np.finfo(float).eps
68
+
69
+ h, w, c = x0.shape
70
+ h1, w1, c1 = x1.shape
71
+
72
+ x0 = x0.reshape((h*w, c))
73
+ x1 = x1.reshape((h1*w1, c1))
74
+
75
+ a = np.cov(x0.T)
76
+ b = np.cov(x1.T)
77
+
78
+ Da2, Ua = np.linalg.eig(a)
79
+ Da = np.diag(np.sqrt(Da2.clip(eps, None)))
80
+
81
+ C = np.dot(np.dot(np.dot(np.dot(Da, Ua.T), b), Ua), Da)
82
+
83
+ Dc2, Uc = np.linalg.eig(C)
84
+ Dc = np.diag(np.sqrt(Dc2.clip(eps, None)))
85
+
86
+ Da_inv = np.diag(1./(np.diag(Da)))
87
+
88
+ t = np.dot(
89
+ np.dot(np.dot(np.dot(np.dot(np.dot(Ua, Da_inv), Uc), Dc), Uc.T), Da_inv), Ua.T)
90
+
91
+ mx0 = np.mean(x0, axis=0)
92
+ mx1 = np.mean(x1, axis=0)
93
+
94
+ result = np.dot(x0-mx0, t) + mx1
95
+ return np.clip(result.reshape((h, w, c)).astype(x0.dtype), 0, 1)
96
+
97
+
98
+ def color_transfer_idt(i0, i1, bins=256, n_rot=20):
99
+ relaxation = 1 / n_rot
100
+ h, w, c = i0.shape
101
+ h1, w1, c1 = i1.shape
102
+
103
+ i0 = i0.reshape((h*w, c))
104
+ i1 = i1.reshape((h1*w1, c1))
105
+
106
+ n_dims = c
107
+
108
+ d0 = i0.T
109
+ d1 = i1.T
110
+
111
+ for i in range(n_rot):
112
+
113
+ r = sp.stats.special_ortho_group.rvs(n_dims).astype(np.float32)
114
+
115
+ d0r = np.dot(r, d0)
116
+ d1r = np.dot(r, d1)
117
+ d_r = np.empty_like(d0)
118
+
119
+ for j in range(n_dims):
120
+
121
+ lo = min(d0r[j].min(), d1r[j].min())
122
+ hi = max(d0r[j].max(), d1r[j].max())
123
+
124
+ p0r, edges = np.histogram(d0r[j], bins=bins, range=[lo, hi])
125
+ p1r, _ = np.histogram(d1r[j], bins=bins, range=[lo, hi])
126
+
127
+ cp0r = p0r.cumsum().astype(np.float32)
128
+ cp0r /= cp0r[-1]
129
+
130
+ cp1r = p1r.cumsum().astype(np.float32)
131
+ cp1r /= cp1r[-1]
132
+
133
+ f = np.interp(cp0r, cp1r, edges[1:])
134
+
135
+ d_r[j] = np.interp(d0r[j], edges[1:], f, left=0, right=bins)
136
+
137
+ d0 = relaxation * np.linalg.solve(r, (d_r - d0r)) + d0
138
+
139
+ return np.clip(d0.T.reshape((h, w, c)).astype(i0.dtype), 0, 1)
140
+
141
+
142
+ def laplacian_matrix(n, m):
143
+ mat_D = scipy.sparse.lil_matrix((m, m))
144
+ mat_D.setdiag(-1, -1)
145
+ mat_D.setdiag(4)
146
+ mat_D.setdiag(-1, 1)
147
+ mat_A = scipy.sparse.block_diag([mat_D] * n).tolil()
148
+ mat_A.setdiag(-1, 1*m)
149
+ mat_A.setdiag(-1, -1*m)
150
+ return mat_A
151
+
152
+
153
+ def seamless_clone(source, target, mask):
154
+ h, w, c = target.shape
155
+ result = []
156
+
157
+ mat_A = laplacian_matrix(h, w)
158
+ laplacian = mat_A.tocsc()
159
+
160
+ mask[0, :] = 1
161
+ mask[-1, :] = 1
162
+ mask[:, 0] = 1
163
+ mask[:, -1] = 1
164
+ q = np.argwhere(mask == 0)
165
+
166
+ k = q[:, 1]+q[:, 0]*w
167
+ mat_A[k, k] = 1
168
+ mat_A[k, k + 1] = 0
169
+ mat_A[k, k - 1] = 0
170
+ mat_A[k, k + w] = 0
171
+ mat_A[k, k - w] = 0
172
+
173
+ mat_A = mat_A.tocsc()
174
+ mask_flat = mask.flatten()
175
+ for channel in range(c):
176
+
177
+ source_flat = source[:, :, channel].flatten()
178
+ target_flat = target[:, :, channel].flatten()
179
+
180
+ mat_b = laplacian.dot(source_flat)*0.75
181
+ mat_b[mask_flat == 0] = target_flat[mask_flat == 0]
182
+
183
+ x = spsolve(mat_A, mat_b).reshape((h, w))
184
+ result.append(x)
185
+
186
+ return np.clip(np.dstack(result), 0, 1)
187
+
188
+
189
+ def reinhard_color_transfer(target, source, clip=False, preserve_paper=False, source_mask=None, target_mask=None):
190
+ """
191
+ Transfers the color distribution from the source to the target
192
+ image using the mean and standard deviations of the L*a*b*
193
+ color space.
194
+
195
+ This implementation is (loosely) based on to the "Color Transfer
196
+ between Images" paper by Reinhard et al., 2001.
197
+
198
+ Parameters:
199
+ -------
200
+ source: NumPy array
201
+ OpenCV image in BGR color space (the source image)
202
+ target: NumPy array
203
+ OpenCV image in BGR color space (the target image)
204
+ clip: Should components of L*a*b* image be scaled by np.clip before
205
+ converting back to BGR color space?
206
+ If False then components will be min-max scaled appropriately.
207
+ Clipping will keep target image brightness truer to the input.
208
+ Scaling will adjust image brightness to avoid washed out portions
209
+ in the resulting color transfer that can be caused by clipping.
210
+ preserve_paper: Should color transfer strictly follow methodology
211
+ layed out in original paper? The method does not always produce
212
+ aesthetically pleasing results.
213
+ If False then L*a*b* components will scaled using the reciprocal of
214
+ the scaling factor proposed in the paper. This method seems to produce
215
+ more consistently aesthetically pleasing results
216
+
217
+ Returns:
218
+ -------
219
+ transfer: NumPy array
220
+ OpenCV image (w, h, 3) NumPy array (uint8)
221
+ """
222
+
223
+ # convert the images from the RGB to L*ab* color space, being
224
+ # sure to utilizing the floating point data type (note: OpenCV
225
+ # expects floats to be 32-bit, so use that instead of 64-bit)
226
+ source = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype(np.float32)
227
+ target = cv2.cvtColor(target, cv2.COLOR_BGR2LAB).astype(np.float32)
228
+
229
+ # compute color statistics for the source and target images
230
+ src_input = source if source_mask is None else source*source_mask
231
+ tgt_input = target if target_mask is None else target*target_mask
232
+ (lMeanSrc, lStdSrc, aMeanSrc, aStdSrc,
233
+ bMeanSrc, bStdSrc) = lab_image_stats(src_input)
234
+ (lMeanTar, lStdTar, aMeanTar, aStdTar,
235
+ bMeanTar, bStdTar) = lab_image_stats(tgt_input)
236
+
237
+ # subtract the means from the target image
238
+ (l, a, b) = cv2.split(target)
239
+ l -= lMeanTar
240
+ a -= aMeanTar
241
+ b -= bMeanTar
242
+
243
+ if preserve_paper:
244
+ # scale by the standard deviations using paper proposed factor
245
+ l = (lStdTar / lStdSrc) * l
246
+ a = (aStdTar / aStdSrc) * a
247
+ b = (bStdTar / bStdSrc) * b
248
+ else:
249
+ # scale by the standard deviations using reciprocal of paper proposed factor
250
+ l = (lStdSrc / lStdTar) * l
251
+ a = (aStdSrc / aStdTar) * a
252
+ b = (bStdSrc / bStdTar) * b
253
+
254
+ # add in the source mean
255
+ l += lMeanSrc
256
+ a += aMeanSrc
257
+ b += bMeanSrc
258
+
259
+ # clip/scale the pixel intensities to [0, 255] if they fall
260
+ # outside this range
261
+ l = _scale_array(l, clip=clip)
262
+ a = _scale_array(a, clip=clip)
263
+ b = _scale_array(b, clip=clip)
264
+
265
+ # merge the channels together and convert back to the RGB color
266
+ # space, being sure to utilize the 8-bit unsigned integer data
267
+ # type
268
+ transfer = cv2.merge([l, a, b])
269
+ transfer = cv2.cvtColor(transfer.astype(np.uint8), cv2.COLOR_LAB2BGR)
270
+
271
+ # return the color transferred image
272
+ return transfer
273
+
274
+
275
+ def linear_color_transfer(target_img, source_img, mode='pca', eps=1e-5):
276
+ '''
277
+ Matches the colour distribution of the target image to that of the source image
278
+ using a linear transform.
279
+ Images are expected to be of form (w,h,c) and float in [0,1].
280
+ Modes are chol, pca or sym for different choices of basis.
281
+ '''
282
+ mu_t = target_img.mean(0).mean(0)
283
+ t = target_img - mu_t
284
+ t = t.transpose(2, 0, 1).reshape(t.shape[-1], -1)
285
+ Ct = t.dot(t.T) / t.shape[1] + eps * np.eye(t.shape[0])
286
+ mu_s = source_img.mean(0).mean(0)
287
+ s = source_img - mu_s
288
+ s = s.transpose(2, 0, 1).reshape(s.shape[-1], -1)
289
+ Cs = s.dot(s.T) / s.shape[1] + eps * np.eye(s.shape[0])
290
+ if mode == 'chol':
291
+ chol_t = np.linalg.cholesky(Ct)
292
+ chol_s = np.linalg.cholesky(Cs)
293
+ ts = chol_s.dot(np.linalg.inv(chol_t)).dot(t)
294
+ if mode == 'pca':
295
+ eva_t, eve_t = np.linalg.eigh(Ct)
296
+ Qt = eve_t.dot(np.sqrt(np.diag(eva_t))).dot(eve_t.T)
297
+ eva_s, eve_s = np.linalg.eigh(Cs)
298
+ Qs = eve_s.dot(np.sqrt(np.diag(eva_s))).dot(eve_s.T)
299
+ ts = Qs.dot(np.linalg.inv(Qt)).dot(t)
300
+ if mode == 'sym':
301
+ eva_t, eve_t = np.linalg.eigh(Ct)
302
+ Qt = eve_t.dot(np.sqrt(np.diag(eva_t))).dot(eve_t.T)
303
+ Qt_Cs_Qt = Qt.dot(Cs).dot(Qt)
304
+ eva_QtCsQt, eve_QtCsQt = np.linalg.eigh(Qt_Cs_Qt)
305
+ QtCsQt = eve_QtCsQt.dot(np.sqrt(np.diag(eva_QtCsQt))).dot(eve_QtCsQt.T)
306
+ ts = np.linalg.inv(Qt).dot(QtCsQt).dot(np.linalg.inv(Qt)).dot(t)
307
+ matched_img = ts.reshape(
308
+ *target_img.transpose(2, 0, 1).shape).transpose(1, 2, 0)
309
+ matched_img += mu_s
310
+ matched_img[matched_img > 1] = 1
311
+ matched_img[matched_img < 0] = 0
312
+ return np.clip(matched_img.astype(source_img.dtype), 0, 1)
313
+
314
+
315
+ def lab_image_stats(image):
316
+ # compute the mean and standard deviation of each channel
317
+ (l, a, b) = cv2.split(image)
318
+ (lMean, lStd) = (l.mean(), l.std())
319
+ (aMean, aStd) = (a.mean(), a.std())
320
+ (bMean, bStd) = (b.mean(), b.std())
321
+
322
+ # return the color statistics
323
+ return (lMean, lStd, aMean, aStd, bMean, bStd)
324
+
325
+
326
+ def _scale_array(arr, clip=True):
327
+ if clip:
328
+ return np.clip(arr, 0, 255)
329
+
330
+ mn = arr.min()
331
+ mx = arr.max()
332
+ scale_range = (max([mn, 0]), min([mx, 255]))
333
+
334
+ if mn < scale_range[0] or mx > scale_range[1]:
335
+ return (scale_range[1] - scale_range[0]) * (arr - mn) / (mx - mn) + scale_range[0]
336
+
337
+ return arr
338
+
339
+
340
+ def channel_hist_match(source, template, hist_match_threshold=255, mask=None):
341
+ # Code borrowed from:
342
+ # https://stackoverflow.com/questions/32655686/histogram-matching-of-two-images-in-python-2-x
343
+ masked_source = source
344
+ masked_template = template
345
+
346
+ if mask is not None:
347
+ masked_source = source * mask
348
+ masked_template = template * mask
349
+
350
+ oldshape = source.shape
351
+ source = source.ravel()
352
+ template = template.ravel()
353
+ masked_source = masked_source.ravel()
354
+ masked_template = masked_template.ravel()
355
+ s_values, bin_idx, s_counts = np.unique(source, return_inverse=True,
356
+ return_counts=True)
357
+ t_values, t_counts = np.unique(template, return_counts=True)
358
+
359
+ s_quantiles = np.cumsum(s_counts).astype(np.float64)
360
+ s_quantiles = hist_match_threshold * s_quantiles / s_quantiles[-1]
361
+ t_quantiles = np.cumsum(t_counts).astype(np.float64)
362
+ t_quantiles = 255 * t_quantiles / t_quantiles[-1]
363
+ interp_t_values = np.interp(s_quantiles, t_quantiles, t_values)
364
+
365
+ return interp_t_values[bin_idx].reshape(oldshape)
366
+
367
+
368
+ def color_hist_match(src_im, tar_im, hist_match_threshold=255, mask=None):
369
+ h, w, c = src_im.shape
370
+ matched_R = channel_hist_match(
371
+ src_im[:, :, 0], tar_im[:, :, 0], hist_match_threshold, mask)
372
+ matched_G = channel_hist_match(
373
+ src_im[:, :, 1], tar_im[:, :, 1], hist_match_threshold, mask)
374
+ matched_B = channel_hist_match(
375
+ src_im[:, :, 2], tar_im[:, :, 2], hist_match_threshold, mask)
376
+
377
+ to_stack = (matched_R, matched_G, matched_B)
378
+ for i in range(3, c):
379
+ to_stack += (src_im[:, :, i],)
380
+
381
+ matched = np.stack(to_stack, axis=-1).astype(src_im.dtype)
382
+ return matched
383
+
384
+
385
+ def color_transfer_mix(img_src, img_trg):
386
+ img_src = np.clip(img_src*255.0, 0, 255).astype(np.uint8)
387
+ img_trg = np.clip(img_trg*255.0, 0, 255).astype(np.uint8)
388
+
389
+ img_src_lab = cv2.cvtColor(img_src, cv2.COLOR_BGR2LAB)
390
+ img_trg_lab = cv2.cvtColor(img_trg, cv2.COLOR_BGR2LAB)
391
+
392
+ rct_light = np.clip(linear_color_transfer(img_src_lab[..., 0:1].astype(np.float32)/255.0,
393
+ img_trg_lab[..., 0:1].astype(np.float32)/255.0)[..., 0]*255.0,
394
+ 0, 255).astype(np.uint8)
395
+
396
+ img_src_lab[..., 0] = (np.ones_like(rct_light)*100).astype(np.uint8)
397
+ img_src_lab = cv2.cvtColor(img_src_lab, cv2.COLOR_LAB2BGR)
398
+
399
+ img_trg_lab[..., 0] = (np.ones_like(rct_light)*100).astype(np.uint8)
400
+ img_trg_lab = cv2.cvtColor(img_trg_lab, cv2.COLOR_LAB2BGR)
401
+
402
+ img_rct = color_transfer_sot(img_src_lab.astype(
403
+ np.float32), img_trg_lab.astype(np.float32))
404
+ img_rct = np.clip(img_rct, 0, 255).astype(np.uint8)
405
+
406
+ img_rct = cv2.cvtColor(img_rct, cv2.COLOR_BGR2LAB)
407
+ img_rct[..., 0] = rct_light
408
+ img_rct = cv2.cvtColor(img_rct, cv2.COLOR_LAB2BGR)
409
+
410
+ return (img_rct / 255.0).astype(np.float32)
411
+
412
+
413
+ def colorTransfer_fs(src_, dst_, mask):
414
+ src = dst_
415
+ dst = src_
416
+ transferredDst = np.copy(dst)
417
+ # indeksy nie czarnych pikseli maski
418
+ maskIndices = np.where(mask != 0)
419
+ # src[maskIndices[0], maskIndices[1]] zwraca piksele w nie czarnym obszarze maski
420
+
421
+ maskedSrc = src[maskIndices[0], maskIndices[1]].astype(np.int32)
422
+ maskedDst = dst[maskIndices[0], maskIndices[1]].astype(np.int32)
423
+
424
+ meanSrc = np.mean(maskedSrc, axis=0)
425
+ meanDst = np.mean(maskedDst, axis=0)
426
+
427
+ maskedDst = maskedDst - meanDst
428
+ maskedDst = maskedDst + meanSrc
429
+ maskedDst = np.clip(maskedDst, 0, 255)
430
+
431
+ transferredDst[maskIndices[0], maskIndices[1]] = maskedDst
432
+ return transferredDst
433
+
434
+ def colorTransfer_avg(img_src, img_tgt, mask=None):
435
+ img_new = img_src.copy()
436
+ img_old = img_tgt.copy()
437
+ # print(mask)
438
+ if mask is not None:
439
+ img_new = (img_new*mask)#.astype(np.uint8)
440
+ img_old = (img_old*mask)#.astype(np.uint8)
441
+ # cv2.imshow('tgt', img_old)
442
+ w,h,c = img_new.shape
443
+ for i in range(img_new.shape[2]):
444
+ old_avg = img_old[:, :, i].mean()
445
+ new_avg = img_new[:, :, i].mean()
446
+ diff_int = old_avg - new_avg
447
+ # print(diff_int)
448
+ for m in range(img_new.shape[0]):
449
+ for n in range(img_new.shape[1]):
450
+ temp = img_new[m,n,i] + diff_int
451
+ temp = max(0., temp)
452
+ temp = min(1., temp)
453
+ # print(img_new[m,n,i], temp)
454
+ img_new[m,n,i] = temp
455
+
456
+ return img_new
457
+
458
+
459
+
460
+ def color_transfer(ct_mode, img_src, img_trg, mask):
461
+ """
462
+ color transfer for [0,1] float32 inputs
463
+ """
464
+ img_src = img_src.astype(dtype=np.float32) / 255.0
465
+ img_trg = img_trg.astype(dtype=np.float32) / 255.0
466
+
467
+ if ct_mode == 'lct':
468
+ out = linear_color_transfer(img_src, img_trg)
469
+ elif ct_mode == 'rct':
470
+ out = reinhard_color_transfer(np.clip(img_src*255, 0, 255).astype(np.uint8),
471
+ np.clip(img_trg*255, 0,
472
+ 255).astype(np.uint8),
473
+ preserve_paper=np.random.rand() < 0.5,
474
+ clip=np.random.rand() < 0.5)
475
+ out = np.clip(out.astype(np.float32) / 255.0, 0.0, 1.0)
476
+ elif ct_mode == 'rct-m':
477
+ out = reinhard_color_transfer(np.clip(img_src*255, 0, 255).astype(np.uint8),
478
+ np.clip(img_trg*255, 0,
479
+ 255).astype(np.uint8),
480
+ source_mask=mask, target_mask=mask)
481
+ #preserve_paper=np.random.rand() < 0.5,
482
+ #clip=np.random.rand() < 0.5)
483
+ out = np.clip(out.astype(np.float32) / 255.0, 0.0, 1.0)
484
+ elif ct_mode == 'rct-fs':
485
+ out = colorTransfer_fs(np.clip(img_src*255, 0, 255).astype(np.uint8),
486
+ np.clip(img_trg*255, 0, 255).astype(np.uint8), mask)
487
+ out = np.clip(out.astype(np.float32) / 255.0, 0.0, 1.0)
488
+ elif ct_mode == 'mkl':
489
+ out = color_transfer_mkl(img_src, img_trg)
490
+ elif ct_mode == 'mkl-m':
491
+ out = color_transfer_mkl(img_src*mask, img_trg*mask)
492
+ elif ct_mode == 'idt':
493
+ out = color_transfer_idt(img_src, img_trg)
494
+ elif ct_mode == 'idt-m':
495
+ out = color_transfer_idt(img_src*mask, img_trg*mask)
496
+ elif ct_mode == 'sot':
497
+ out = color_transfer_sot(img_src, img_trg)
498
+ out = np.clip(out, 0.0, 1.0)
499
+ elif ct_mode == 'sot-m':
500
+ out = color_transfer_sot(
501
+ (img_src*mask).astype(np.float32), (img_trg*mask).astype(np.float32))
502
+ out = np.clip(out, 0.0, 1.0)
503
+ elif ct_mode == 'mix-m':
504
+ out = color_transfer_mix(img_src*mask, img_trg*mask)
505
+ elif ct_mode == 'seamless-hist-match':
506
+ out = color_hist_match(img_src, img_trg)
507
+ elif ct_mode == 'seamless-hist-match-m':
508
+ out = color_hist_match(img_src, img_trg, mask=mask)
509
+ elif ct_mode == 'avg-align':
510
+ out = colorTransfer_avg(img_src, img_trg, mask=mask)
511
+ out = np.clip(out, 0.0, 1.0)
512
+ else:
513
+ raise ValueError(f"unknown ct_mode {ct_mode}")
514
+
515
+ out = np.clip(out*255, 0, 255).astype(np.uint8)
516
+ return out