mrbear1024 commited on
Commit
7d0b3e4
·
verified ·
1 Parent(s): a93730e

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +3 -0
  2. Dockerfile +0 -0
  3. README.md +169 -0
  4. app.py +3 -0
  5. checkpoints/checkpoints/240112_icl_audio2secc_vox2_cmlr/config.yaml +86 -0
  6. checkpoints/checkpoints/240112_icl_audio2secc_vox2_cmlr/model_ckpt_steps_1856000.ckpt +3 -0
  7. checkpoints/checkpoints/mimictalk_orig/os_secc2plane_torso/config.yaml +152 -0
  8. checkpoints/checkpoints/mimictalk_orig/os_secc2plane_torso/model_ckpt_steps_100000.ckpt +3 -0
  9. checkpoints/checkpoints/pretrained_ckpts/mit_b0.pth +3 -0
  10. checkpoints/checkpoints_mimictalk/German_20s/config.yaml +155 -0
  11. checkpoints/checkpoints_mimictalk/German_20s/model_ckpt_steps_10000.ckpt +3 -0
  12. deep_3drecon/BFM/.gitkeep +0 -0
  13. deep_3drecon/BFM/01_MorphableModel.mat +3 -0
  14. deep_3drecon/BFM/BFM_exp_idx.mat +0 -0
  15. deep_3drecon/BFM/BFM_front_idx.mat +0 -0
  16. deep_3drecon/BFM/BFM_model_front.mat +3 -0
  17. deep_3drecon/BFM/Exp_Pca.bin +3 -0
  18. deep_3drecon/BFM/basel_53201.txt +0 -0
  19. deep_3drecon/BFM/facemodel_info.mat +3 -0
  20. deep_3drecon/BFM/index_mp468_from_mesh35709.npy +3 -0
  21. deep_3drecon/BFM/index_mp468_from_mesh35709_v1.npy +3 -0
  22. deep_3drecon/BFM/index_mp468_from_mesh35709_v2.npy +3 -0
  23. deep_3drecon/BFM/index_mp468_from_mesh35709_v3.1.npy +3 -0
  24. deep_3drecon/BFM/index_mp468_from_mesh35709_v3.npy +3 -0
  25. deep_3drecon/BFM/select_vertex_id.mat +0 -0
  26. deep_3drecon/BFM/similarity_Lm3D_all.mat +0 -0
  27. deep_3drecon/BFM/std_exp.txt +1 -0
  28. deep_3drecon/__init__.py +1 -0
  29. deep_3drecon/bfm_left_eye_faces.npy +3 -0
  30. deep_3drecon/bfm_right_eye_faces.npy +3 -0
  31. deep_3drecon/data_preparation.py +45 -0
  32. deep_3drecon/deep_3drecon_models/__init__.py +67 -0
  33. deep_3drecon/deep_3drecon_models/arcface_torch/README.md +218 -0
  34. deep_3drecon/deep_3drecon_models/arcface_torch/backbones/__init__.py +85 -0
  35. deep_3drecon/deep_3drecon_models/arcface_torch/backbones/iresnet.py +194 -0
  36. deep_3drecon/deep_3drecon_models/arcface_torch/backbones/iresnet2060.py +176 -0
  37. deep_3drecon/deep_3drecon_models/arcface_torch/backbones/mobilefacenet.py +147 -0
  38. deep_3drecon/deep_3drecon_models/arcface_torch/backbones/vit.py +280 -0
  39. deep_3drecon/deep_3drecon_models/arcface_torch/configs/3millions.py +23 -0
  40. deep_3drecon/deep_3drecon_models/arcface_torch/configs/__init__.py +0 -0
  41. deep_3drecon/deep_3drecon_models/arcface_torch/configs/base.py +59 -0
  42. deep_3drecon/deep_3drecon_models/arcface_torch/configs/glint360k_mbf.py +27 -0
  43. deep_3drecon/deep_3drecon_models/arcface_torch/configs/glint360k_r100.py +27 -0
  44. deep_3drecon/deep_3drecon_models/arcface_torch/configs/glint360k_r50.py +27 -0
  45. deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv2_mbf.py +27 -0
  46. deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv2_r100.py +27 -0
  47. deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv2_r50.py +27 -0
  48. deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv3_mbf.py +27 -0
  49. deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv3_r100.py +27 -0
  50. deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv3_r50.py +27 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ deep_3drecon/BFM/01_MorphableModel.mat filter=lfs diff=lfs merge=lfs -text
37
+ deep_3drecon/BFM/BFM_model_front.mat filter=lfs diff=lfs merge=lfs -text
38
+ deep_3drecon/BFM/facemodel_info.mat filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
File without changes
README.md ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Demo
3
+ emoji: 👀
4
+ colorFrom: blue
5
+ colorTo: pink
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
11
+
12
+
13
+ # MimicTalk: Mimicking a personalized and expressive 3D talking face in few minutes | NeurIPS 2024
14
+ [![arXiv](https://img.shields.io/badge/arXiv-Paper-%3CCOLOR%3E.svg)](https://arxiv.org/abs/2401.08503)| [![GitHub Stars](https://img.shields.io/github/stars/yerfor/MimicTalk
15
+ )](https://github.com/yerfor/MimicTalk) | [English Readme](./README.md)
16
+
17
+ 这个仓库是MimicTalk的官方PyTorch实现, 用于实现特定说话人的高表现力的虚拟人视频合成。该仓库代码基于我们先前的工作[Real3D-Portrait](https://github.com/yerfor/Real3DPortrait) (ICLR 2024),即基于NeRF的one-shot说话人合成,这让Mimictalk的训练加速且效果增强。您可以访问我们的[项目页面](https://mimictalk.github.io/)以观看Demo视频, 阅读我们的[论文](https://arxiv.org/abs/2410.06734)以了解技术细节。
18
+
19
+ <p align="center">
20
+ <br>
21
+ <img src="assets/mimictalk.png" width="100%"/>
22
+ <br>
23
+ </p>
24
+
25
+ # 快速上手!
26
+ ## 安装环境
27
+ 请参照[环境配置文档](docs/prepare_env/install_guide-zh.md),配置Conda环境`mimictalk`
28
+ ## 下载预训练与第三方模型
29
+ ### 3DMM BFM模型
30
+ 下载3DMM BFM模型:[Google Drive](https://drive.google.com/drive/folders/1o4t5YIw7w4cMUN4bgU9nPf6IyWVG1bEk?usp=sharing) 或 [BaiduYun Disk](https://pan.baidu.com/s/1aqv1z_qZ23Vp2VP4uxxblQ?pwd=m9q5 ) 提取码: m9q5
31
+
32
+
33
+ 下载完成后,放置全部的文件到`deep_3drecon/BFM`里,文件结构如下:
34
+ ```
35
+ deep_3drecon/BFM/
36
+ ├── 01_MorphableModel.mat
37
+ ├── BFM_exp_idx.mat
38
+ ├── BFM_front_idx.mat
39
+ ├── BFM_model_front.mat
40
+ ├── Exp_Pca.bin
41
+ ├── facemodel_info.mat
42
+ ├── index_mp468_from_mesh35709.npy
43
+ ├── mediapipe_in_bfm53201.npy
44
+ └── std_exp.txt
45
+ ```
46
+
47
+ ### 预训练模型
48
+ 下载预训练的MimicTalk相关Checkpoints:[Google Drive](https://drive.google.com/drive/folders/1Kc6ueDO9HFDN3BhtJCEKNCZtyKHSktaA?usp=sharing) or [BaiduYun Disk](https://pan.baidu.com/s/1nQKyGV5JB6rJtda7qsThUg?pwd=mimi) 提取码: mimi
49
+
50
+ 下载完成后,放置全部的文件到`checkpoints`与`checkpoints_mimictalk`里并解压,文件结构如下:
51
+ ```
52
+ checkpoints/
53
+ ├── mimictalk_orig
54
+ │ └── os_secc2plane_torso
55
+ │ ├── config.yaml
56
+ │ └── model_ckpt_steps_100000.ckpt
57
+ |-- 240112_icl_audio2secc_vox2_cmlr
58
+ │ ├── config.yaml
59
+ │ └── model_ckpt_steps_1856000.ckpt
60
+ └── pretrained_ckpts
61
+ └── mit_b0.pth
62
+
63
+ checkpoints_mimictalk/
64
+ └── German_20s
65
+ ├── config.yaml
66
+ └── model_ckpt_steps_10000.ckpt
67
+ ```
68
+
69
+ ## MimicTalk训练与推理的最简命令
70
+ ```
71
+ python inference/train_mimictalk_on_a_video.py # train the model, this may take 10 minutes for 2,000 steps
72
+ python inference/mimictalk_infer.py # infer the model
73
+ ```
74
+
75
+
76
+ # 训练与推理细节
77
+ 我们目前提供了**命令行(CLI)**与**Gradio WebUI**推理方式。音频驱动推理的人像信息来自于`torso_ckpt`,因此需要至少再提供`driving audio`用于推理。另外,可以提供`style video`让模型能够预测与该视频风格一致的说话人动作。
78
+
79
+ 首先,切换至项目根目录并启用Conda环境:
80
+ ```bash
81
+ cd <Real3DPortraitRoot>
82
+ conda activate mimictalk
83
+ export PYTHONPATH=./
84
+ export HF_ENDPOINT=https://hf-mirror.com
85
+ ```
86
+
87
+ ## Gradio WebUI推理
88
+ 启动Gradio WebUI,按照提示上传素材,点击`Training`按钮进行训练;训练完成后点击`Generate`按钮即可推理:
89
+ ```bash
90
+ python inference/app_mimictalk.py
91
+ ```
92
+
93
+ ## 命令行特定说话人训练
94
+
95
+ 需要至少提供`source video`,训练指令:
96
+ ```bash
97
+ python inference/train_mimictalk_on_a_video.py \
98
+ --video_id <PATH_TO_SOURCE_VIDEO> \
99
+ --max_updates <UPDATES_NUMBER> \
100
+ --work_dir <PATH_TO_SAVING_CKPT>
101
+ ```
102
+
103
+ 一些可选参数注释:
104
+
105
+ - `--torso_ckpt` 预训练的Real3D-Portrait模型
106
+ - `--max_updates` 训练更新次数
107
+ - `--batch_size` 训练的batch size: `1` 需要约8GB显存; `2`需要约15GB显存
108
+ - `--lr_triplane` triplane的学习率:对于视频输入, 应为0.1; 对于图片输入,应为0.001
109
+ - `--work_dir` 未指定时,将默认存储在`checkpoints_mimictalk/`中
110
+
111
+ 指令示例:
112
+ ```bash
113
+ python inference/train_mimictalk_on_a_video.py \
114
+ --video_id data/raw/videos/German_20s.mp4 \
115
+ --max_updates 2000 \
116
+ --work_dir checkpoints_mimictalk/German_20s
117
+ ```
118
+
119
+ ## 命令行推理
120
+
121
+ 需要至少提供`driving audio`,可选提供`driving style`,推理指令:
122
+ ```bash
123
+ python inference/mimictalk_infer.py \
124
+ --drv_aud <PATH_TO_AUDIO> \
125
+ --drv_style <PATH_TO_STYLE_VIDEO, OPTIONAL> \
126
+ --drv_pose <PATH_TO_POSE_VIDEO, OPTIONAL> \
127
+ --bg_img <PATH_TO_BACKGROUND_IMAGE, OPTIONAL> \
128
+ --out_name <PATH_TO_OUTPUT_VIDEO, OPTIONAL>
129
+ ```
130
+
131
+ 一些可选参数注释:
132
+ - `--drv_pose` 指定时提供了运动pose信息,不指定则为静态运动
133
+ - `--bg_img` ��定时提供了背景信息,不指定则为source image提取的背景
134
+ - `--mouth_amp` 嘴部张幅参数,值越大张幅越大
135
+ - `--map_to_init_pose` 值为`True`时,首帧的pose将被映射到source pose,后续帧也作相同变换
136
+ - `--temperature` 代表audio2motion的采样温度,值越大结果越多样,但同时精确度越低
137
+ - `--out_name` 不指定时,结果将保存在`infer_out/tmp/`中
138
+ - `--out_mode` 值为`final`时,只输出说话人视频;值为`concat_debug`时,同时输出一些可视化的中间结果
139
+
140
+ 推理命令例子:
141
+ ```bash
142
+ python inference/mimictalk_infer.py \
143
+ --drv_aud data/raw/examples/Obama_5s.wav \
144
+ --drv_pose data/raw/examples/German_20s.mp4 \
145
+ --drv_style data/raw/examples/German_20s.mp4 \
146
+ --bg_img data/raw/examples/bg.png \
147
+ --out_name output.mp4 \
148
+ --out_mode final
149
+ ```
150
+
151
+ # 声明
152
+ 任何组织或个人未经本人同意,不得使用本文提及的任何技术生成他人说话的视频,包括但不限于政府领导人、政界人士、社会名流等。如不遵守本条款,则可能违反版权法。
153
+
154
+ # 引用我们
155
+ 如果这个仓库对你有帮助,请考虑引用我们的工作:
156
+ ```
157
+ @inproceedings{ye2024mimicktalk,
158
+ author = {Ye, Zhenhui and Zhong, Tianyun and Ren, Yi and Yang, Jiaqi and Li, Weichuang and Huang, Jiangwei and Jiang, Ziyue and He, Jinzheng and Huang, Rongjie and Liu, Jinglin and Zhang, Chen and Yin, Xiang and Ma, Zejun and Zhao, Zhou},
159
+ title = {MimicTalk: Mimicking a personalized and expressive 3D talking face in few minutes},
160
+ journal = {NeurIPS},
161
+ year = {2024},
162
+ }
163
+ @inproceedings{ye2024real3d,
164
+ title = {Real3D-Portrait: One-shot Realistic 3D Talking Portrait Synthesis},
165
+ author = {Ye, Zhenhui and Zhong, Tianyun and Ren, Yi and Yang, Jiaqi and Li, Weichuang and Huang, Jiawei and Jiang, Ziyue and He, Jinzheng and Huang, Rongjie and Liu, Jinglin and others},
166
+ journal = {ICLR},
167
+ year={2024}
168
+ }
169
+ ```
app.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from transformers import AutoModel
2
+ model = AutoModel.from_pretrained("mrbear1024/mimictalk")
3
+ print(model)
checkpoints/checkpoints/240112_icl_audio2secc_vox2_cmlr/config.yaml ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accumulate_grad_batches: 1
2
+ amp: false
3
+ audio_type: hubert
4
+ base_config:
5
+ - ./audio2secc_vae.yaml
6
+ batch_size: 4
7
+ binarization_args:
8
+ with_coeff: true
9
+ with_hubert: true
10
+ with_mel: true
11
+ binary_data_dir: data/binary/voxceleb2_audio2motion
12
+ blink_mode: blink_unit
13
+ clip_grad_norm: 0.5
14
+ clip_grad_value: 0
15
+ debug: false
16
+ ds_name: Concat_voxceleb2_CMLR
17
+ eval_max_batches: 10
18
+ gen_dir_name: ''
19
+ hidden_size: 256
20
+ icl_model_type: icl_flow_matching
21
+ infer_audio_source_name: ''
22
+ infer_ckpt_steps: 40000
23
+ infer_out_npy_name: ''
24
+ init_from_ckpt: ''
25
+ lambda_kl: 0.02
26
+ lambda_kl_t1: 2000
27
+ lambda_kl_t2: 2000
28
+ lambda_l2_reg_exp: 0.0
29
+ lambda_lap_exp: 0.0
30
+ lambda_lap_exp_x1: 0.1
31
+ lambda_mse_exp: 0.0
32
+ lambda_mse_exp_x1: 0.1
33
+ lambda_mse_icl: 1.0
34
+ lambda_mse_lm2d: 0.0
35
+ lambda_mse_lm3d: 0.0
36
+ lambda_mse_lm3d_x1: 0.1
37
+ lambda_sync_lm3d: 0.05
38
+ load_ckpt: ''
39
+ load_db_to_memory: false
40
+ lr: 0.0005
41
+ max_sentences_per_batch: 512
42
+ max_tokens_per_batch: 20000
43
+ max_updates: 4000000
44
+ motion_type: exp
45
+ num_ckpt_keep: 100
46
+ num_sanity_val_steps: 5
47
+ num_valid_plots: 1
48
+ num_workers: 8
49
+ optimizer_adam_beta1: 0.9
50
+ optimizer_adam_beta2: 0.999
51
+ print_nan_grads: false
52
+ process_id: 0
53
+ raw_data_dir: /home/tiger/datasets/raw/TH1KH_512
54
+ ref_id_mode: first_frame
55
+ resume_from_checkpoint: 0
56
+ sample_min_length: 32
57
+ save_best: false
58
+ save_codes:
59
+ - tasks
60
+ - modules
61
+ - egs
62
+ save_gt: true
63
+ scheduler: exponential
64
+ seed: 9999
65
+ smo_win_size: 5
66
+ split_seed: 999
67
+ syncnet_ckpt_dir: checkpoints/0904_syncnet/syncnet_hubert_vox2
68
+ task_cls: tasks.os_avatar.icl_audio2secc_task.Audio2SECCTask
69
+ tb_log_interval: 100
70
+ total_process: 1
71
+ use_aux_features: true
72
+ use_aux_loss_on_x1: true
73
+ use_eye_amp_embed: false
74
+ use_flow: true
75
+ use_fork: true
76
+ use_kv_dataset: true
77
+ use_mouth_amp_embed: true
78
+ use_pitch: true
79
+ val_check_interval: 2000
80
+ valid_infer_interval: 2000
81
+ valid_monitor_key: val_loss
82
+ valid_monitor_mode: min
83
+ warmup_updates: 1000
84
+ weight_decay: 0
85
+ work_dir: checkpoints/240112_audio2secc/icl_audio2secc_vox2_cmlr
86
+ x_multiply: 16
checkpoints/checkpoints/240112_icl_audio2secc_vox2_cmlr/model_ckpt_steps_1856000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:367167db3b25fe07de9255871ce7813158551c4b00bfadd7334a671648924a2e
3
+ size 462941609
checkpoints/checkpoints/mimictalk_orig/os_secc2plane_torso/config.yaml ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accumulate_grad_batches: 1
2
+ add_ffhq_singe_disc: false
3
+ also_update_decoder: false
4
+ amp: false
5
+ base_channel: 32768
6
+ base_config:
7
+ - ./secc_img2plane.yaml
8
+ batch_size: 1
9
+ binary_data_dir: data/binary/CelebV-HQ
10
+ blur_fade_kimg: 20
11
+ blur_init_sigma: 10
12
+ blur_raw_target: true
13
+ box_warp: 1
14
+ ckpt_milestone_interval: 50000
15
+ clip_grad_norm: 1.0
16
+ clip_grad_value: 0
17
+ cond_hid_dim: 32
18
+ cond_out_dim: 16
19
+ cond_type: idexp_lm3d_normalized
20
+ debug: false
21
+ density_reg_p_dist: 0.004
22
+ disable_highreso_at_stage1: true
23
+ disc_c_noise: 1.0
24
+ disc_cond_mode: none
25
+ ds_name: Concat_VFHQ_CelebVHQ_TH1KH_RAVDESS
26
+ ema_interval: 400
27
+ enable_rescale_plane_regulation: false
28
+ eval_max_batches: 100
29
+ ffhq_disc_inp_mode: eg3d_gen
30
+ final_resolution: 512
31
+ flipped_to_world_coord: true
32
+ fuse_with_deform_source: false
33
+ gen_cond_mode: none
34
+ generator_condition_on_pose: true
35
+ gpc_reg_fade_kimg: 1000
36
+ gpc_reg_prob: 0.8
37
+ group_size_for_mini_batch_std: 2
38
+ htbsr_head_threshold: 1.0
39
+ htbsr_head_weight_fuse_mode: v2
40
+ img2plane_backbone_mode: composite
41
+ img2plane_backbone_scale: standard
42
+ init_from_ckpt: checkpoints/240118_os_secc2planes/os_secc2plane_pertubeBlink0.05_pertubeSECC0.05
43
+ lam_occlusion_2_reg_l1: 0.0
44
+ lam_occlusion_reg_l1: 0.0
45
+ lam_occlusion_weights_entropy: 0.001
46
+ lambda_G_adversarial_adv: 1.0
47
+ lambda_G_supervise_adv: 1.0
48
+ lambda_G_supervise_mse: 1.0
49
+ lambda_G_supervise_mse_raw: 1.0
50
+ lambda_density_reg: 0.25
51
+ lambda_ffhq_mv_adv: 0.002
52
+ lambda_gradient_penalty: 1.0
53
+ lambda_mse: 1.0
54
+ lambda_mse_depth: 0.0
55
+ lambda_th1kh_mv_adv: 0.003
56
+ lambda_weights_entropy: 0.01
57
+ lambda_weights_l1: 0.1
58
+ load_ckpt: ''
59
+ lpips_mode: vgg19_v2
60
+ lr_d: 0.0002
61
+ lr_decay_interval: 5000
62
+ lr_decay_rate: 0.95
63
+ lr_g: 1.0e-05
64
+ lr_lambda_pertube_secc: 0.01
65
+ lr_mul_cano_img2plane: 1.0
66
+ mapping_network_depth: 2
67
+ max_channel: 512
68
+ max_updates: 100000
69
+ mimic_plane: false
70
+ min_rescale_factor: 0.25
71
+ motion_smo_win_size: 5
72
+ neural_rendering_resolution: 128
73
+ normalize_cond: false
74
+ normalize_radius: false
75
+ not_save_modules:
76
+ - criterion_lpips
77
+ - eg3d_model
78
+ num_ckpt_keep: 1
79
+ num_fp16_layers_in_discriminator: 4
80
+ num_fp16_layers_in_generator: 0
81
+ num_fp16_layers_in_super_resolution: 4
82
+ num_samples_coarse: 48
83
+ num_samples_fine: 48
84
+ num_sanity_val_steps: 1
85
+ num_valid_plots: 25
86
+ num_workers: 8
87
+ occlusion_fuse: true
88
+ ones_ws_for_sr: true
89
+ optimizer_adam_beta1_d: 0.0
90
+ optimizer_adam_beta1_g: 0.0
91
+ optimizer_adam_beta2_d: 0.99
92
+ optimizer_adam_beta2_g: 0.99
93
+ phase1_plane_fusion_mode: add
94
+ pncc_cond_mode: cano_src_tgt
95
+ pretrained_eg3d_ckpt: /mnt/bn/ailabrenyi/entries/yezhenhui/projects/GeneFace_private/checkpoints/0628_img2planes/eg3d_baseline_run2/model_ckpt_steps_100000.ckpt
96
+ print_nan_grads: false
97
+ process_id: 0
98
+ processed_data_dir: data/processed/videos
99
+ random_sample_pose: true
100
+ raw_data_dir: /home/tiger/datasets/raw/FFHQ
101
+ ray_far: auto
102
+ ray_near: auto
103
+ reg_interval_d: 16
104
+ reg_interval_g: 4
105
+ reg_interval_g_cond: 4
106
+ reload_head_ckpt: ''
107
+ resume_from_checkpoint: 0
108
+ save_best: true
109
+ save_codes:
110
+ - tasks
111
+ - modules
112
+ - egs
113
+ secc_pertube_mode: randn
114
+ secc_pertube_randn_scale: 0.01
115
+ secc_segformer_scale: b0
116
+ seed: 9999
117
+ seg_out_mode: head
118
+ smo_win_size: 5
119
+ split_seed: 999
120
+ sr_type: vanilla
121
+ start_adv_iters: 40000
122
+ target_pertube_blink_secc_loss: 0.05
123
+ target_pertube_secc_loss: 0.05
124
+ task_cls: tasks.os_avatar.secc_img2plane_torso_task.SECC_Img2PlaneEG3D_TorsoTask
125
+ tb_log_interval: 100
126
+ torch_compile: true
127
+ torso_kp_num: 4
128
+ torso_model_version: v2
129
+ torso_occlusion_reg_unmask_factor: 0.3
130
+ torso_ref_segout_mode: torso
131
+ total_process: 1
132
+ triplane_depth: 3
133
+ triplane_feature_type: trigrid_v2
134
+ triplane_hid_dim: 32
135
+ two_stage_training: true
136
+ update_on_th1kh_samples: false
137
+ update_src2src_interval: 4
138
+ use_kv_dataset: true
139
+ use_motion_smo_net: false
140
+ use_mse: false
141
+ use_th1kh_disc: false
142
+ use_th1kh_mv_adv: false
143
+ val_check_interval: 2000
144
+ valid_infer_interval: 2000
145
+ valid_monitor_key: val_loss
146
+ valid_monitor_mode: min
147
+ video_id: May
148
+ w_dim: 512
149
+ warmup_updates: 4000
150
+ weight_fuse: true
151
+ work_dir: checkpoints/240120_os_secc2planes_torso/os_secc2plane_torso_htbsrFusev2_htbsrThres1.0
152
+ z_dim: 512
checkpoints/checkpoints/mimictalk_orig/os_secc2plane_torso/model_ckpt_steps_100000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:757c1b73d938da0ec3fba555a155a31ac803ddc8d343bba2a3c69845844b213a
3
+ size 1414788463
checkpoints/checkpoints/pretrained_ckpts/mit_b0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df468f7f13c4186f25bd3e2caf09e4f927b5b5ac0abccac84011dae747d4c49c
3
+ size 14331578
checkpoints/checkpoints_mimictalk/German_20s/config.yaml ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accumulate_grad_batches: 1
2
+ add_ffhq_singe_disc: false
3
+ also_update_decoder: false
4
+ amp: false
5
+ base_channel: 32768
6
+ base_config:
7
+ - ./secc_img2plane.yaml
8
+ batch_size: 1
9
+ binary_data_dir: data/binary/CelebV-HQ
10
+ blur_fade_kimg: 20
11
+ blur_init_sigma: 10
12
+ blur_raw_target: true
13
+ box_warp: 1
14
+ ckpt_milestone_interval: 50000
15
+ clip_grad_norm: 1.0
16
+ clip_grad_value: 0
17
+ cond_hid_dim: 32
18
+ cond_out_dim: 16
19
+ cond_type: idexp_lm3d_normalized
20
+ debug: false
21
+ density_reg_p_dist: 0.004
22
+ disable_highreso_at_stage1: true
23
+ disc_c_noise: 1.0
24
+ disc_cond_mode: none
25
+ ds_name: Concat_VFHQ_CelebVHQ_TH1KH_RAVDESS
26
+ ema_interval: 400
27
+ enable_rescale_plane_regulation: false
28
+ eval_max_batches: 100
29
+ ffhq_disc_inp_mode: eg3d_gen
30
+ final_resolution: 512
31
+ flipped_to_world_coord: true
32
+ fuse_with_deform_source: false
33
+ gen_cond_mode: none
34
+ generator_condition_on_pose: true
35
+ gpc_reg_fade_kimg: 1000
36
+ gpc_reg_prob: 0.8
37
+ group_size_for_mini_batch_std: 2
38
+ htbsr_head_threshold: 1.0
39
+ htbsr_head_weight_fuse_mode: v2
40
+ img2plane_backbone_mode: composite
41
+ img2plane_backbone_scale: standard
42
+ init_from_ckpt: checkpoints/240118_os_secc2planes/os_secc2plane_pertubeBlink0.05_pertubeSECC0.05
43
+ lam_occlusion_2_reg_l1: 0.0
44
+ lam_occlusion_reg_l1: 0.0
45
+ lam_occlusion_weights_entropy: 0.001
46
+ lambda_G_adversarial_adv: 1.0
47
+ lambda_G_supervise_adv: 1.0
48
+ lambda_G_supervise_mse: 1.0
49
+ lambda_G_supervise_mse_raw: 1.0
50
+ lambda_density_reg: 0.25
51
+ lambda_ffhq_mv_adv: 0.002
52
+ lambda_gradient_penalty: 1.0
53
+ lambda_mse: 1.0
54
+ lambda_mse_depth: 0.0
55
+ lambda_th1kh_mv_adv: 0.003
56
+ lambda_weights_entropy: 0.01
57
+ lambda_weights_l1: 0.1
58
+ load_ckpt: ''
59
+ lpips_mode: vgg19_v2
60
+ lr_d: 0.0002
61
+ lr_decay_interval: 5000
62
+ lr_decay_rate: 0.95
63
+ lr_g: 1.0e-05
64
+ lr_lambda_pertube_secc: 0.01
65
+ lr_mul_cano_img2plane: 1.0
66
+ mapping_network_depth: 2
67
+ max_channel: 512
68
+ max_updates: 100000
69
+ mimic_plane: false
70
+ min_rescale_factor: 0.25
71
+ motion_smo_win_size: 5
72
+ neural_rendering_resolution: 128
73
+ normalize_cond: false
74
+ normalize_radius: false
75
+ not_save_modules:
76
+ - criterion_lpips
77
+ - eg3d_model
78
+ num_ckpt_keep: 1
79
+ num_fp16_layers_in_discriminator: 4
80
+ num_fp16_layers_in_generator: 0
81
+ num_fp16_layers_in_super_resolution: 4
82
+ num_samples_coarse: 48
83
+ num_samples_fine: 48
84
+ num_sanity_val_steps: 1
85
+ num_valid_plots: 25
86
+ num_workers: 8
87
+ occlusion_fuse: true
88
+ ones_ws_for_sr: true
89
+ optimizer_adam_beta1_d: 0.0
90
+ optimizer_adam_beta1_g: 0.0
91
+ optimizer_adam_beta2_d: 0.99
92
+ optimizer_adam_beta2_g: 0.99
93
+ phase1_plane_fusion_mode: add
94
+ pncc_cond_mode: cano_src_tgt
95
+ pretrained_eg3d_ckpt: /mnt/bn/ailabrenyi/entries/yezhenhui/projects/GeneFace_private/checkpoints/0628_img2planes/eg3d_baseline_run2/model_ckpt_steps_100000.ckpt
96
+ print_nan_grads: false
97
+ process_id: 0
98
+ processed_data_dir: data/processed/videos
99
+ random_sample_pose: true
100
+ raw_data_dir: /home/tiger/datasets/raw/FFHQ
101
+ ray_far: auto
102
+ ray_near: auto
103
+ reg_interval_d: 16
104
+ reg_interval_g: 4
105
+ reg_interval_g_cond: 4
106
+ reload_head_ckpt: ''
107
+ resume_from_checkpoint: 0
108
+ save_best: true
109
+ save_codes:
110
+ - tasks
111
+ - modules
112
+ - egs
113
+ secc_pertube_mode: randn
114
+ secc_pertube_randn_scale: 0.01
115
+ secc_segformer_scale: b0
116
+ seed: 9999
117
+ seg_out_mode: head
118
+ smo_win_size: 5
119
+ split_seed: 999
120
+ sr_type: vanilla
121
+ start_adv_iters: 40000
122
+ target_pertube_blink_secc_loss: 0.05
123
+ target_pertube_secc_loss: 0.05
124
+ task_cls: tasks.os_avatar.secc_img2plane_torso_task.SECC_Img2PlaneEG3D_TorsoTask
125
+ tb_log_interval: 100
126
+ torch_compile: true
127
+ torso_kp_num: 4
128
+ torso_model_version: v2
129
+ torso_occlusion_reg_unmask_factor: 0.3
130
+ torso_ref_segout_mode: torso
131
+ total_process: 1
132
+ triplane_depth: 3
133
+ triplane_feature_type: trigrid_v2
134
+ triplane_hid_dim: 32
135
+ two_stage_training: true
136
+ update_on_th1kh_samples: false
137
+ update_src2src_interval: 4
138
+ use_kv_dataset: true
139
+ use_motion_smo_net: false
140
+ use_mse: false
141
+ use_th1kh_disc: false
142
+ use_th1kh_mv_adv: false
143
+ val_check_interval: 2000
144
+ valid_infer_interval: 2000
145
+ valid_monitor_key: val_loss
146
+ valid_monitor_mode: min
147
+ video_id: May
148
+ w_dim: 512
149
+ warmup_updates: 4000
150
+ weight_fuse: true
151
+ work_dir: checkpoints/240120_os_secc2planes_torso/os_secc2plane_torso_htbsrFusev2_htbsrThres1.0
152
+ z_dim: 512
153
+
154
+ lora_r: 2
155
+ lora_mode: secc2plane_sr
checkpoints/checkpoints_mimictalk/German_20s/model_ckpt_steps_10000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66f6e77b1c1f2fc548c3c94e6357800ad54819c70817a2ea726ff59b44ca8028
3
+ size 829401932
deep_3drecon/BFM/.gitkeep ADDED
File without changes
deep_3drecon/BFM/01_MorphableModel.mat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37b1f0742db356a3b1568a8365a06f5b0fe0ab687ac1c3068c803666cbd4d8e2
3
+ size 240875364
deep_3drecon/BFM/BFM_exp_idx.mat ADDED
Binary file (91.9 kB). View file
 
deep_3drecon/BFM/BFM_front_idx.mat ADDED
Binary file (44.9 kB). View file
 
deep_3drecon/BFM/BFM_model_front.mat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7655695ee8cf64a23db9a3cff730b4a0573160690805d1861b7d651f1596319
3
+ size 127170280
deep_3drecon/BFM/Exp_Pca.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7f31380e6cbdaf2aeec698db220bac4f221946e4d551d88c092d47ec49b1726
3
+ size 51086404
deep_3drecon/BFM/basel_53201.txt ADDED
The diff for this file is too large to render. See raw diff
 
deep_3drecon/BFM/facemodel_info.mat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:529398f76619ae7e22f43c25dd60a2473bcc2bcc8c894fd9c613c68624ce1c04
3
+ size 738861
deep_3drecon/BFM/index_mp468_from_mesh35709.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:053b8cce8424b722db6ec5b068514eb007a23b4c5afd629449eb08746e643211
3
+ size 3872
deep_3drecon/BFM/index_mp468_from_mesh35709_v1.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d238a90df0c55075c9cea43dab76348421379a75c204931e34dbd2c11fb4b65
3
+ size 3872
deep_3drecon/BFM/index_mp468_from_mesh35709_v2.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe95e2bb10ac1e54804006184d7de3c5ccd0eb98a5f1bd28e00b9f3569f6ce5a
3
+ size 3872
deep_3drecon/BFM/index_mp468_from_mesh35709_v3.1.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:053b8cce8424b722db6ec5b068514eb007a23b4c5afd629449eb08746e643211
3
+ size 3872
deep_3drecon/BFM/index_mp468_from_mesh35709_v3.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b007b3619dd02892b38349ba3d4b10e32bc2eff201c265f25d6ed62f67dbd51
3
+ size 3872
deep_3drecon/BFM/select_vertex_id.mat ADDED
Binary file (62.3 kB). View file
 
deep_3drecon/BFM/similarity_Lm3D_all.mat ADDED
Binary file (994 Bytes). View file
 
deep_3drecon/BFM/std_exp.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 453980 257264 263068 211890 135873 184721 47055.6 72732 62787.4 106226 56708.5 51439.8 34887.1 44378.7 51813.4 31030.7 23354.9 23128.1 19400 21827.6 22767.7 22057.4 19894.3 16172.8 17142.7 10035.3 14727.5 12972.5 10763.8 8953.93 8682.62 8941.81 6342.3 5205.3 7065.65 6083.35 6678.88 4666.63 5082.89 5134.76 4908.16 3964.93 3739.95 3180.09 2470.45 1866.62 1624.71 2423.74 1668.53 1471.65 1194.52 782.102 815.044 835.782 834.937 744.496 575.146 633.76 705.685 753.409 620.306 673.326 766.189 619.866 559.93 357.264 396.472 556.849 455.048 460.592 400.735 326.702 279.428 291.535 326.584 305.664 287.816 283.642 276.19
deep_3drecon/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .reconstructor import *
deep_3drecon/bfm_left_eye_faces.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9651756ea2c0fac069a1edf858ed1f125eddc358fa74c529a370c1e7b5730d28
3
+ size 4680
deep_3drecon/bfm_right_eye_faces.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28cb5bbacf578d30a3d5006ec28c617fe5a3ecaeeeb87d9433a884e0f0301a2e
3
+ size 4648
deep_3drecon/data_preparation.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """This script is the data preparation script for Deep3DFaceRecon_pytorch
2
+ """
3
+
4
+ import os
5
+ import numpy as np
6
+ import argparse
7
+ from util.detect_lm68 import detect_68p,load_lm_graph
8
+ from util.skin_mask import get_skin_mask
9
+ from util.generate_list import check_list, write_list
10
+ import warnings
11
+ warnings.filterwarnings("ignore")
12
+
13
+ parser = argparse.ArgumentParser()
14
+ parser.add_argument('--data_root', type=str, default='datasets', help='root directory for training data')
15
+ parser.add_argument('--img_folder', nargs="+", required=True, help='folders of training images')
16
+ parser.add_argument('--mode', type=str, default='train', help='train or val')
17
+ opt = parser.parse_args()
18
+
19
+ os.environ['CUDA_VISIBLE_DEVICES'] = '0'
20
+
21
+ def data_prepare(folder_list,mode):
22
+
23
+ lm_sess,input_op,output_op = load_lm_graph('./checkpoints/lm_model/68lm_detector.pb') # load a tensorflow version 68-landmark detector
24
+
25
+ for img_folder in folder_list:
26
+ detect_68p(img_folder,lm_sess,input_op,output_op) # detect landmarks for images
27
+ get_skin_mask(img_folder) # generate skin attention mask for images
28
+
29
+ # create files that record path to all training data
30
+ msks_list = []
31
+ for img_folder in folder_list:
32
+ path = os.path.join(img_folder, 'mask')
33
+ msks_list += ['/'.join([img_folder, 'mask', i]) for i in sorted(os.listdir(path)) if 'jpg' in i or
34
+ 'png' in i or 'jpeg' in i or 'PNG' in i]
35
+
36
+ imgs_list = [i.replace('mask/', '') for i in msks_list]
37
+ lms_list = [i.replace('mask', 'landmarks') for i in msks_list]
38
+ lms_list = ['.'.join(i.split('.')[:-1]) + '.txt' for i in lms_list]
39
+
40
+ lms_list_final, imgs_list_final, msks_list_final = check_list(lms_list, imgs_list, msks_list) # check if the path is valid
41
+ write_list(lms_list_final, imgs_list_final, msks_list_final, mode=mode) # save files
42
+
43
+ if __name__ == '__main__':
44
+ print('Datasets:',opt.img_folder)
45
+ data_prepare([os.path.join(opt.data_root,folder) for folder in opt.img_folder],opt.mode)
deep_3drecon/deep_3drecon_models/__init__.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """This package contains modules related to objective functions, optimizations, and network architectures.
2
+
3
+ To add a custom model class called 'dummy', you need to add a file called 'dummy_model.py' and define a subclass DummyModel inherited from BaseModel.
4
+ You need to implement the following five functions:
5
+ -- <__init__>: initialize the class; first call BaseModel.__init__(self, opt).
6
+ -- <set_input>: unpack data from dataset and apply preprocessing.
7
+ -- <forward>: produce intermediate results.
8
+ -- <optimize_parameters>: calculate loss, gradients, and update network weights.
9
+ -- <modify_commandline_options>: (optionally) add model-specific options and set default options.
10
+
11
+ In the function <__init__>, you need to define four lists:
12
+ -- self.loss_names (str list): specify the training losses that you want to plot and save.
13
+ -- self.model_names (str list): define networks used in our training.
14
+ -- self.visual_names (str list): specify the images that you want to display and save.
15
+ -- self.optimizers (optimizer list): define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an usage.
16
+
17
+ Now you can use the model class by specifying flag '--model dummy'.
18
+ See our template model class 'template_model.py' for more details.
19
+ """
20
+
21
+ import importlib
22
+ from .base_model import BaseModel
23
+
24
+
25
+ def find_model_using_name(model_name):
26
+ """Import the module "models/[model_name]_model.py".
27
+
28
+ In the file, the class called DatasetNameModel() will
29
+ be instantiated. It has to be a subclass of BaseModel,
30
+ and it is case-insensitive.
31
+ """
32
+ model_filename = "deep_3drecon_models." + model_name + "_model"
33
+ modellib = importlib.import_module(model_filename)
34
+ model = None
35
+ target_model_name = model_name.replace('_', '') + 'model'
36
+ for name, cls in modellib.__dict__.items():
37
+ if name.lower() == target_model_name.lower() \
38
+ and issubclass(cls, BaseModel):
39
+ model = cls
40
+
41
+ if model is None:
42
+ print("In %s.py, there should be a subclass of BaseModel with class name that matches %s in lowercase." % (model_filename, target_model_name))
43
+ exit(0)
44
+
45
+ return model
46
+
47
+
48
+ def get_option_setter(model_name):
49
+ """Return the static method <modify_commandline_options> of the model class."""
50
+ model_class = find_model_using_name(model_name)
51
+ return model_class.modify_commandline_options
52
+
53
+
54
+ def create_model(opt):
55
+ """Create a model given the option.
56
+
57
+ This function warps the class CustomDatasetDataLoader.
58
+ This is the main interface between this package and 'train.py'/'test.py'
59
+
60
+ Example:
61
+ >>> from models import create_model
62
+ >>> model = create_model(opt)
63
+ """
64
+ model = find_model_using_name(opt.model)
65
+ instance = model(opt)
66
+ print("model [%s] was created" % type(instance).__name__)
67
+ return instance
deep_3drecon/deep_3drecon_models/arcface_torch/README.md ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Distributed Arcface Training in Pytorch
2
+
3
+ The "arcface_torch" repository is the official implementation of the ArcFace algorithm. It supports distributed and sparse training with multiple distributed training examples, including several memory-saving techniques such as mixed precision training and gradient checkpointing. It also supports training for ViT models and datasets including WebFace42M and Glint360K, two of the largest open-source datasets. Additionally, the repository comes with a built-in tool for converting to ONNX format, making it easy to submit to MFR evaluation systems.
4
+
5
+ [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/killing-two-birds-with-one-stone-efficient/face-verification-on-ijb-c)](https://paperswithcode.com/sota/face-verification-on-ijb-c?p=killing-two-birds-with-one-stone-efficient)
6
+ [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/killing-two-birds-with-one-stone-efficient/face-verification-on-ijb-b)](https://paperswithcode.com/sota/face-verification-on-ijb-b?p=killing-two-birds-with-one-stone-efficient)
7
+ [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/killing-two-birds-with-one-stone-efficient/face-verification-on-agedb-30)](https://paperswithcode.com/sota/face-verification-on-agedb-30?p=killing-two-birds-with-one-stone-efficient)
8
+ [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/killing-two-birds-with-one-stone-efficient/face-verification-on-cfp-fp)](https://paperswithcode.com/sota/face-verification-on-cfp-fp?p=killing-two-birds-with-one-stone-efficient)
9
+
10
+ ## Requirements
11
+
12
+ To avail the latest features of PyTorch, we have upgraded to version 1.12.0.
13
+
14
+ - Install [PyTorch](https://pytorch.org/get-started/previous-versions/) (torch>=1.12.0).
15
+ - (Optional) Install [DALI](https://docs.nvidia.com/deeplearning/dali/user-guide/docs/), our doc for [install_dali.md](docs/install_dali.md).
16
+ - `pip install -r requirement.txt`.
17
+
18
+ ## How to Training
19
+
20
+ To train a model, execute the `train.py` script with the path to the configuration files. The sample commands provided below demonstrate the process of conducting distributed training.
21
+
22
+ ### 1. To run on one GPU:
23
+
24
+ ```shell
25
+ python train_v2.py configs/ms1mv3_r50_onegpu
26
+ ```
27
+
28
+ Note:
29
+ It is not recommended to use a single GPU for training, as this may result in longer training times and suboptimal performance. For best results, we suggest using multiple GPUs or a GPU cluster.
30
+
31
+
32
+ ### 2. To run on a machine with 8 GPUs:
33
+
34
+ ```shell
35
+ torchrun --nproc_per_node=8 train.py configs/ms1mv3_r50
36
+ ```
37
+
38
+ ### 3. To run on 2 machines with 8 GPUs each:
39
+
40
+ Node 0:
41
+
42
+ ```shell
43
+ torchrun --nproc_per_node=8 --nnodes=2 --node_rank=0 --master_addr="ip1" --master_port=12581 train.py configs/wf42m_pfc02_16gpus_r100
44
+ ```
45
+
46
+ Node 1:
47
+
48
+ ```shell
49
+ torchrun --nproc_per_node=8 --nnodes=2 --node_rank=1 --master_addr="ip1" --master_port=12581 train.py configs/wf42m_pfc02_16gpus_r100
50
+ ```
51
+
52
+ ### 4. Run ViT-B on a machine with 24k batchsize:
53
+
54
+ ```shell
55
+ torchrun --nproc_per_node=8 train_v2.py configs/wf42m_pfc03_40epoch_8gpu_vit_b
56
+ ```
57
+
58
+
59
+ ## Download Datasets or Prepare Datasets
60
+ - [MS1MV2](https://github.com/deepinsight/insightface/tree/master/recognition/_datasets_#ms1m-arcface-85k-ids58m-images-57) (87k IDs, 5.8M images)
61
+ - [MS1MV3](https://github.com/deepinsight/insightface/tree/master/recognition/_datasets_#ms1m-retinaface) (93k IDs, 5.2M images)
62
+ - [Glint360K](https://github.com/deepinsight/insightface/tree/master/recognition/partial_fc#4-download) (360k IDs, 17.1M images)
63
+ - [WebFace42M](docs/prepare_webface42m.md) (2M IDs, 42.5M images)
64
+ - [Your Dataset, Click Here!](docs/prepare_custom_dataset.md)
65
+
66
+ Note:
67
+ If you want to use DALI for data reading, please use the script 'scripts/shuffle_rec.py' to shuffle the InsightFace style rec before using it.
68
+ Example:
69
+
70
+ `python scripts/shuffle_rec.py ms1m-retinaface-t1`
71
+
72
+ You will get the "shuffled_ms1m-retinaface-t1" folder, where the samples in the "train.rec" file are shuffled.
73
+
74
+
75
+ ## Model Zoo
76
+
77
+ - The models are available for non-commercial research purposes only.
78
+ - All models can be found in here.
79
+ - [Baidu Yun Pan](https://pan.baidu.com/s/1CL-l4zWqsI1oDuEEYVhj-g): e8pw
80
+ - [OneDrive](https://1drv.ms/u/s!AswpsDO2toNKq0lWY69vN58GR6mw?e=p9Ov5d)
81
+
82
+ ### Performance on IJB-C and [**ICCV2021-MFR**](https://github.com/deepinsight/insightface/blob/master/challenges/mfr/README.md)
83
+
84
+ ICCV2021-MFR testset consists of non-celebrities so we can ensure that it has very few overlap with public available face
85
+ recognition training set, such as MS1M and CASIA as they mostly collected from online celebrities.
86
+ As the result, we can evaluate the FAIR performance for different algorithms.
87
+
88
+ For **ICCV2021-MFR-ALL** set, TAR is measured on all-to-all 1:1 protocal, with FAR less than 0.000001(e-6). The
89
+ globalised multi-racial testset contains 242,143 identities and 1,624,305 images.
90
+
91
+
92
+ #### 1. Training on Single-Host GPU
93
+
94
+ | Datasets | Backbone | **MFR-ALL** | IJB-C(1E-4) | IJB-C(1E-5) | log |
95
+ |:---------------|:--------------------|:------------|:------------|:------------|:------------------------------------------------------------------------------------------------------------------------------------|
96
+ | MS1MV2 | mobilefacenet-0.45G | 62.07 | 93.61 | 90.28 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv2_mbf/training.log) |
97
+ | MS1MV2 | r50 | 75.13 | 95.97 | 94.07 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv2_r50/training.log) |
98
+ | MS1MV2 | r100 | 78.12 | 96.37 | 94.27 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv2_r100/training.log) |
99
+ | MS1MV3 | mobilefacenet-0.45G | 63.78 | 94.23 | 91.33 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv3_mbf/training.log) |
100
+ | MS1MV3 | r50 | 79.14 | 96.37 | 94.47 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv3_r50/training.log) |
101
+ | MS1MV3 | r100 | 81.97 | 96.85 | 95.02 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv3_r100/training.log) |
102
+ | Glint360K | mobilefacenet-0.45G | 70.18 | 95.04 | 92.62 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/glint360k_mbf/training.log) |
103
+ | Glint360K | r50 | 86.34 | 97.16 | 95.81 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/glint360k_r50/training.log) |
104
+ | Glint360k | r100 | 89.52 | 97.55 | 96.38 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/glint360k_r100/training.log) |
105
+ | WF4M | r100 | 89.87 | 97.19 | 95.48 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/wf4m_r100/training.log) |
106
+ | WF12M-PFC-0.2 | r100 | 94.75 | 97.60 | 95.90 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/wf12m_pfc02_r100/training.log) |
107
+ | WF12M-PFC-0.3 | r100 | 94.71 | 97.64 | 96.01 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/wf12m_pfc03_r100/training.log) |
108
+ | WF12M | r100 | 94.69 | 97.59 | 95.97 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/wf12m_r100/training.log) |
109
+ | WF42M-PFC-0.2 | r100 | 96.27 | 97.70 | 96.31 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/wf42m_pfc02_r100/training.log) |
110
+ | WF42M-PFC-0.2 | ViT-T-1.5G | 92.04 | 97.27 | 95.68 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/wf42m_pfc02_40epoch_8gpu_vit_t/training.log) |
111
+ | WF42M-PFC-0.3 | ViT-B-11G | 97.16 | 97.91 | 97.05 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/pfc03_wf42m_vit_b_8gpu/training.log) |
112
+
113
+ #### 2. Training on Multi-Host GPU
114
+
115
+ | Datasets | Backbone(bs*gpus) | **MFR-ALL** | IJB-C(1E-4) | IJB-C(1E-5) | Throughout | log |
116
+ |:-----------------|:------------------|:------------|:------------|:------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------|
117
+ | WF42M-PFC-0.2 | r50(512*8) | 93.83 | 97.53 | 96.16 | ~5900 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/webface42m_r50_bs4k_pfc02/training.log) |
118
+ | WF42M-PFC-0.2 | r50(512*16) | 93.96 | 97.46 | 96.12 | ~11000 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/webface42m_r50_lr01_pfc02_bs8k_16gpus/training.log) |
119
+ | WF42M-PFC-0.2 | r50(128*32) | 94.04 | 97.48 | 95.94 | ~17000 | click me |
120
+ | WF42M-PFC-0.2 | r100(128*16) | 96.28 | 97.80 | 96.57 | ~5200 | click me |
121
+ | WF42M-PFC-0.2 | r100(256*16) | 96.69 | 97.85 | 96.63 | ~5200 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/webface42m_r100_bs4k_pfc02/training.log) |
122
+ | WF42M-PFC-0.0018 | r100(512*32) | 93.08 | 97.51 | 95.88 | ~10000 | click me |
123
+ | WF42M-PFC-0.2 | r100(128*32) | 96.57 | 97.83 | 96.50 | ~9800 | click me |
124
+
125
+ `r100(128*32)` means backbone is r100, batchsize per gpu is 128, the number of gpus is 32.
126
+
127
+
128
+
129
+ #### 3. ViT For Face Recognition
130
+
131
+ | Datasets | Backbone(bs) | FLOPs | **MFR-ALL** | IJB-C(1E-4) | IJB-C(1E-5) | Throughout | log |
132
+ |:--------------|:--------------|:------|:------------|:------------|:------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------|
133
+ | WF42M-PFC-0.3 | r18(128*32) | 2.6 | 79.13 | 95.77 | 93.36 | - | click me |
134
+ | WF42M-PFC-0.3 | r50(128*32) | 6.3 | 94.03 | 97.48 | 95.94 | - | click me |
135
+ | WF42M-PFC-0.3 | r100(128*32) | 12.1 | 96.69 | 97.82 | 96.45 | - | click me |
136
+ | WF42M-PFC-0.3 | r200(128*32) | 23.5 | 97.70 | 97.97 | 96.93 | - | click me |
137
+ | WF42M-PFC-0.3 | VIT-T(384*64) | 1.5 | 92.24 | 97.31 | 95.97 | ~35000 | click me |
138
+ | WF42M-PFC-0.3 | VIT-S(384*64) | 5.7 | 95.87 | 97.73 | 96.57 | ~25000 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/pfc03_wf42m_vit_s_64gpu/training.log) |
139
+ | WF42M-PFC-0.3 | VIT-B(384*64) | 11.4 | 97.42 | 97.90 | 97.04 | ~13800 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/pfc03_wf42m_vit_b_64gpu/training.log) |
140
+ | WF42M-PFC-0.3 | VIT-L(384*64) | 25.3 | 97.85 | 98.00 | 97.23 | ~9406 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/pfc03_wf42m_vit_l_64gpu/training.log) |
141
+
142
+ `WF42M` means WebFace42M, `PFC-0.3` means negivate class centers sample rate is 0.3.
143
+
144
+ #### 4. Noisy Datasets
145
+
146
+ | Datasets | Backbone | **MFR-ALL** | IJB-C(1E-4) | IJB-C(1E-5) | log |
147
+ |:-------------------------|:---------|:------------|:------------|:------------|:---------|
148
+ | WF12M-Flip(40%) | r50 | 43.87 | 88.35 | 80.78 | click me |
149
+ | WF12M-Flip(40%)-PFC-0.1* | r50 | 80.20 | 96.11 | 93.79 | click me |
150
+ | WF12M-Conflict | r50 | 79.93 | 95.30 | 91.56 | click me |
151
+ | WF12M-Conflict-PFC-0.3* | r50 | 91.68 | 97.28 | 95.75 | click me |
152
+
153
+ `WF12M` means WebFace12M, `+PFC-0.1*` denotes additional abnormal inter-class filtering.
154
+
155
+
156
+
157
+ ## Speed Benchmark
158
+ <div><img src="https://github.com/anxiangsir/insightface_arcface_log/blob/master/pfc_exp.png" width = "90%" /></div>
159
+
160
+
161
+ **Arcface-Torch** is an efficient tool for training large-scale face recognition training sets. When the number of classes in the training sets exceeds one million, the partial FC sampling strategy maintains the same accuracy while providing several times faster training performance and lower GPU memory utilization. The partial FC is a sparse variant of the model parallel architecture for large-scale face recognition, utilizing a sparse softmax that dynamically samples a subset of class centers for each training batch. During each iteration, only a sparse portion of the parameters are updated, leading to a significant reduction in GPU memory requirements and computational demands. With the partial FC approach, it is possible to train sets with up to 29 million identities, the largest to date. Furthermore, the partial FC method supports multi-machine distributed training and mixed precision training.
162
+
163
+
164
+
165
+ More details see
166
+ [speed_benchmark.md](docs/speed_benchmark.md) in docs.
167
+
168
+ > 1. Training Speed of Various Parallel Techniques (Samples per Second) on a Tesla V100 32GB x 8 System (Higher is Optimal)
169
+
170
+ `-` means training failed because of gpu memory limitations.
171
+
172
+ | Number of Identities in Dataset | Data Parallel | Model Parallel | Partial FC 0.1 |
173
+ |:--------------------------------|:--------------|:---------------|:---------------|
174
+ | 125000 | 4681 | 4824 | 5004 |
175
+ | 1400000 | **1672** | 3043 | 4738 |
176
+ | 5500000 | **-** | **1389** | 3975 |
177
+ | 8000000 | **-** | **-** | 3565 |
178
+ | 16000000 | **-** | **-** | 2679 |
179
+ | 29000000 | **-** | **-** | **1855** |
180
+
181
+ > 2. GPU Memory Utilization of Various Parallel Techniques (MB per GPU) on a Tesla V100 32GB x 8 System (Lower is Optimal)
182
+
183
+ | Number of Identities in Dataset | Data Parallel | Model Parallel | Partial FC 0.1 |
184
+ |:--------------------------------|:--------------|:---------------|:---------------|
185
+ | 125000 | 7358 | 5306 | 4868 |
186
+ | 1400000 | 32252 | 11178 | 6056 |
187
+ | 5500000 | **-** | 32188 | 9854 |
188
+ | 8000000 | **-** | **-** | 12310 |
189
+ | 16000000 | **-** | **-** | 19950 |
190
+ | 29000000 | **-** | **-** | 32324 |
191
+
192
+
193
+ ## Citations
194
+
195
+ ```
196
+ @inproceedings{deng2019arcface,
197
+ title={Arcface: Additive angular margin loss for deep face recognition},
198
+ author={Deng, Jiankang and Guo, Jia and Xue, Niannan and Zafeiriou, Stefanos},
199
+ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
200
+ pages={4690--4699},
201
+ year={2019}
202
+ }
203
+ @inproceedings{An_2022_CVPR,
204
+ author={An, Xiang and Deng, Jiankang and Guo, Jia and Feng, Ziyong and Zhu, XuHan and Yang, Jing and Liu, Tongliang},
205
+ title={Killing Two Birds With One Stone: Efficient and Robust Training of Face Recognition CNNs by Partial FC},
206
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
207
+ month={June},
208
+ year={2022},
209
+ pages={4042-4051}
210
+ }
211
+ @inproceedings{zhu2021webface260m,
212
+ title={Webface260m: A benchmark unveiling the power of million-scale deep face recognition},
213
+ author={Zhu, Zheng and Huang, Guan and Deng, Jiankang and Ye, Yun and Huang, Junjie and Chen, Xinze and Zhu, Jiagang and Yang, Tian and Lu, Jiwen and Du, Dalong and Zhou, Jie},
214
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
215
+ pages={10492--10502},
216
+ year={2021}
217
+ }
218
+ ```
deep_3drecon/deep_3drecon_models/arcface_torch/backbones/__init__.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .iresnet import iresnet18, iresnet34, iresnet50, iresnet100, iresnet200
2
+ from .mobilefacenet import get_mbf
3
+
4
+
5
+ def get_model(name, **kwargs):
6
+ # resnet
7
+ if name == "r18":
8
+ return iresnet18(False, **kwargs)
9
+ elif name == "r34":
10
+ return iresnet34(False, **kwargs)
11
+ elif name == "r50":
12
+ return iresnet50(False, **kwargs)
13
+ elif name == "r100":
14
+ return iresnet100(False, **kwargs)
15
+ elif name == "r200":
16
+ return iresnet200(False, **kwargs)
17
+ elif name == "r2060":
18
+ from .iresnet2060 import iresnet2060
19
+ return iresnet2060(False, **kwargs)
20
+
21
+ elif name == "mbf":
22
+ fp16 = kwargs.get("fp16", False)
23
+ num_features = kwargs.get("num_features", 512)
24
+ return get_mbf(fp16=fp16, num_features=num_features)
25
+
26
+ elif name == "mbf_large":
27
+ from .mobilefacenet import get_mbf_large
28
+ fp16 = kwargs.get("fp16", False)
29
+ num_features = kwargs.get("num_features", 512)
30
+ return get_mbf_large(fp16=fp16, num_features=num_features)
31
+
32
+ elif name == "vit_t":
33
+ num_features = kwargs.get("num_features", 512)
34
+ from .vit import VisionTransformer
35
+ return VisionTransformer(
36
+ img_size=112, patch_size=9, num_classes=num_features, embed_dim=256, depth=12,
37
+ num_heads=8, drop_path_rate=0.1, norm_layer="ln", mask_ratio=0.1)
38
+
39
+ elif name == "vit_t_dp005_mask0": # For WebFace42M
40
+ num_features = kwargs.get("num_features", 512)
41
+ from .vit import VisionTransformer
42
+ return VisionTransformer(
43
+ img_size=112, patch_size=9, num_classes=num_features, embed_dim=256, depth=12,
44
+ num_heads=8, drop_path_rate=0.05, norm_layer="ln", mask_ratio=0.0)
45
+
46
+ elif name == "vit_s":
47
+ num_features = kwargs.get("num_features", 512)
48
+ from .vit import VisionTransformer
49
+ return VisionTransformer(
50
+ img_size=112, patch_size=9, num_classes=num_features, embed_dim=512, depth=12,
51
+ num_heads=8, drop_path_rate=0.1, norm_layer="ln", mask_ratio=0.1)
52
+
53
+ elif name == "vit_s_dp005_mask_0": # For WebFace42M
54
+ num_features = kwargs.get("num_features", 512)
55
+ from .vit import VisionTransformer
56
+ return VisionTransformer(
57
+ img_size=112, patch_size=9, num_classes=num_features, embed_dim=512, depth=12,
58
+ num_heads=8, drop_path_rate=0.05, norm_layer="ln", mask_ratio=0.0)
59
+
60
+ elif name == "vit_b":
61
+ # this is a feature
62
+ num_features = kwargs.get("num_features", 512)
63
+ from .vit import VisionTransformer
64
+ return VisionTransformer(
65
+ img_size=112, patch_size=9, num_classes=num_features, embed_dim=512, depth=24,
66
+ num_heads=8, drop_path_rate=0.1, norm_layer="ln", mask_ratio=0.1, using_checkpoint=True)
67
+
68
+ elif name == "vit_b_dp005_mask_005": # For WebFace42M
69
+ # this is a feature
70
+ num_features = kwargs.get("num_features", 512)
71
+ from .vit import VisionTransformer
72
+ return VisionTransformer(
73
+ img_size=112, patch_size=9, num_classes=num_features, embed_dim=512, depth=24,
74
+ num_heads=8, drop_path_rate=0.05, norm_layer="ln", mask_ratio=0.05, using_checkpoint=True)
75
+
76
+ elif name == "vit_l_dp005_mask_005": # For WebFace42M
77
+ # this is a feature
78
+ num_features = kwargs.get("num_features", 512)
79
+ from .vit import VisionTransformer
80
+ return VisionTransformer(
81
+ img_size=112, patch_size=9, num_classes=num_features, embed_dim=768, depth=24,
82
+ num_heads=8, drop_path_rate=0.05, norm_layer="ln", mask_ratio=0.05, using_checkpoint=True)
83
+
84
+ else:
85
+ raise ValueError()
deep_3drecon/deep_3drecon_models/arcface_torch/backbones/iresnet.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ from torch.utils.checkpoint import checkpoint
4
+
5
+ __all__ = ['iresnet18', 'iresnet34', 'iresnet50', 'iresnet100', 'iresnet200']
6
+ using_ckpt = False
7
+
8
+ def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
9
+ """3x3 convolution with padding"""
10
+ return nn.Conv2d(in_planes,
11
+ out_planes,
12
+ kernel_size=3,
13
+ stride=stride,
14
+ padding=dilation,
15
+ groups=groups,
16
+ bias=False,
17
+ dilation=dilation)
18
+
19
+
20
+ def conv1x1(in_planes, out_planes, stride=1):
21
+ """1x1 convolution"""
22
+ return nn.Conv2d(in_planes,
23
+ out_planes,
24
+ kernel_size=1,
25
+ stride=stride,
26
+ bias=False)
27
+
28
+
29
+ class IBasicBlock(nn.Module):
30
+ expansion = 1
31
+ def __init__(self, inplanes, planes, stride=1, downsample=None,
32
+ groups=1, base_width=64, dilation=1):
33
+ super(IBasicBlock, self).__init__()
34
+ if groups != 1 or base_width != 64:
35
+ raise ValueError('BasicBlock only supports groups=1 and base_width=64')
36
+ if dilation > 1:
37
+ raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
38
+ self.bn1 = nn.BatchNorm2d(inplanes, eps=1e-05,)
39
+ self.conv1 = conv3x3(inplanes, planes)
40
+ self.bn2 = nn.BatchNorm2d(planes, eps=1e-05,)
41
+ self.prelu = nn.PReLU(planes)
42
+ self.conv2 = conv3x3(planes, planes, stride)
43
+ self.bn3 = nn.BatchNorm2d(planes, eps=1e-05,)
44
+ self.downsample = downsample
45
+ self.stride = stride
46
+
47
+ def forward_impl(self, x):
48
+ identity = x
49
+ out = self.bn1(x)
50
+ out = self.conv1(out)
51
+ out = self.bn2(out)
52
+ out = self.prelu(out)
53
+ out = self.conv2(out)
54
+ out = self.bn3(out)
55
+ if self.downsample is not None:
56
+ identity = self.downsample(x)
57
+ out += identity
58
+ return out
59
+
60
+ def forward(self, x):
61
+ if self.training and using_ckpt:
62
+ return checkpoint(self.forward_impl, x)
63
+ else:
64
+ return self.forward_impl(x)
65
+
66
+
67
+ class IResNet(nn.Module):
68
+ fc_scale = 7 * 7
69
+ def __init__(self,
70
+ block, layers, dropout=0, num_features=512, zero_init_residual=False,
71
+ groups=1, width_per_group=64, replace_stride_with_dilation=None, fp16=False):
72
+ super(IResNet, self).__init__()
73
+ self.extra_gflops = 0.0
74
+ self.fp16 = fp16
75
+ self.inplanes = 64
76
+ self.dilation = 1
77
+ if replace_stride_with_dilation is None:
78
+ replace_stride_with_dilation = [False, False, False]
79
+ if len(replace_stride_with_dilation) != 3:
80
+ raise ValueError("replace_stride_with_dilation should be None "
81
+ "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
82
+ self.groups = groups
83
+ self.base_width = width_per_group
84
+ self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
85
+ self.bn1 = nn.BatchNorm2d(self.inplanes, eps=1e-05)
86
+ self.prelu = nn.PReLU(self.inplanes)
87
+ self.layer1 = self._make_layer(block, 64, layers[0], stride=2)
88
+ self.layer2 = self._make_layer(block,
89
+ 128,
90
+ layers[1],
91
+ stride=2,
92
+ dilate=replace_stride_with_dilation[0])
93
+ self.layer3 = self._make_layer(block,
94
+ 256,
95
+ layers[2],
96
+ stride=2,
97
+ dilate=replace_stride_with_dilation[1])
98
+ self.layer4 = self._make_layer(block,
99
+ 512,
100
+ layers[3],
101
+ stride=2,
102
+ dilate=replace_stride_with_dilation[2])
103
+ self.bn2 = nn.BatchNorm2d(512 * block.expansion, eps=1e-05,)
104
+ self.dropout = nn.Dropout(p=dropout, inplace=True)
105
+ self.fc = nn.Linear(512 * block.expansion * self.fc_scale, num_features)
106
+ self.features = nn.BatchNorm1d(num_features, eps=1e-05)
107
+ nn.init.constant_(self.features.weight, 1.0)
108
+ self.features.weight.requires_grad = False
109
+
110
+ for m in self.modules():
111
+ if isinstance(m, nn.Conv2d):
112
+ nn.init.normal_(m.weight, 0, 0.1)
113
+ elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
114
+ nn.init.constant_(m.weight, 1)
115
+ nn.init.constant_(m.bias, 0)
116
+
117
+ if zero_init_residual:
118
+ for m in self.modules():
119
+ if isinstance(m, IBasicBlock):
120
+ nn.init.constant_(m.bn2.weight, 0)
121
+
122
+ def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
123
+ downsample = None
124
+ previous_dilation = self.dilation
125
+ if dilate:
126
+ self.dilation *= stride
127
+ stride = 1
128
+ if stride != 1 or self.inplanes != planes * block.expansion:
129
+ downsample = nn.Sequential(
130
+ conv1x1(self.inplanes, planes * block.expansion, stride),
131
+ nn.BatchNorm2d(planes * block.expansion, eps=1e-05, ),
132
+ )
133
+ layers = []
134
+ layers.append(
135
+ block(self.inplanes, planes, stride, downsample, self.groups,
136
+ self.base_width, previous_dilation))
137
+ self.inplanes = planes * block.expansion
138
+ for _ in range(1, blocks):
139
+ layers.append(
140
+ block(self.inplanes,
141
+ planes,
142
+ groups=self.groups,
143
+ base_width=self.base_width,
144
+ dilation=self.dilation))
145
+
146
+ return nn.Sequential(*layers)
147
+
148
+ def forward(self, x):
149
+ with torch.cuda.amp.autocast(self.fp16):
150
+ x = self.conv1(x)
151
+ x = self.bn1(x)
152
+ x = self.prelu(x)
153
+ x = self.layer1(x)
154
+ x = self.layer2(x)
155
+ x = self.layer3(x)
156
+ x = self.layer4(x)
157
+ x = self.bn2(x)
158
+ x = torch.flatten(x, 1)
159
+ x = self.dropout(x)
160
+ x = self.fc(x.float() if self.fp16 else x)
161
+ x = self.features(x)
162
+ return x
163
+
164
+
165
+ def _iresnet(arch, block, layers, pretrained, progress, **kwargs):
166
+ model = IResNet(block, layers, **kwargs)
167
+ if pretrained:
168
+ raise ValueError()
169
+ return model
170
+
171
+
172
+ def iresnet18(pretrained=False, progress=True, **kwargs):
173
+ return _iresnet('iresnet18', IBasicBlock, [2, 2, 2, 2], pretrained,
174
+ progress, **kwargs)
175
+
176
+
177
+ def iresnet34(pretrained=False, progress=True, **kwargs):
178
+ return _iresnet('iresnet34', IBasicBlock, [3, 4, 6, 3], pretrained,
179
+ progress, **kwargs)
180
+
181
+
182
+ def iresnet50(pretrained=False, progress=True, **kwargs):
183
+ return _iresnet('iresnet50', IBasicBlock, [3, 4, 14, 3], pretrained,
184
+ progress, **kwargs)
185
+
186
+
187
+ def iresnet100(pretrained=False, progress=True, **kwargs):
188
+ return _iresnet('iresnet100', IBasicBlock, [3, 13, 30, 3], pretrained,
189
+ progress, **kwargs)
190
+
191
+
192
+ def iresnet200(pretrained=False, progress=True, **kwargs):
193
+ return _iresnet('iresnet200', IBasicBlock, [6, 26, 60, 6], pretrained,
194
+ progress, **kwargs)
deep_3drecon/deep_3drecon_models/arcface_torch/backbones/iresnet2060.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+
4
+ assert torch.__version__ >= "1.8.1"
5
+ from torch.utils.checkpoint import checkpoint_sequential
6
+
7
+ __all__ = ['iresnet2060']
8
+
9
+
10
+ def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
11
+ """3x3 convolution with padding"""
12
+ return nn.Conv2d(in_planes,
13
+ out_planes,
14
+ kernel_size=3,
15
+ stride=stride,
16
+ padding=dilation,
17
+ groups=groups,
18
+ bias=False,
19
+ dilation=dilation)
20
+
21
+
22
+ def conv1x1(in_planes, out_planes, stride=1):
23
+ """1x1 convolution"""
24
+ return nn.Conv2d(in_planes,
25
+ out_planes,
26
+ kernel_size=1,
27
+ stride=stride,
28
+ bias=False)
29
+
30
+
31
+ class IBasicBlock(nn.Module):
32
+ expansion = 1
33
+
34
+ def __init__(self, inplanes, planes, stride=1, downsample=None,
35
+ groups=1, base_width=64, dilation=1):
36
+ super(IBasicBlock, self).__init__()
37
+ if groups != 1 or base_width != 64:
38
+ raise ValueError('BasicBlock only supports groups=1 and base_width=64')
39
+ if dilation > 1:
40
+ raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
41
+ self.bn1 = nn.BatchNorm2d(inplanes, eps=1e-05, )
42
+ self.conv1 = conv3x3(inplanes, planes)
43
+ self.bn2 = nn.BatchNorm2d(planes, eps=1e-05, )
44
+ self.prelu = nn.PReLU(planes)
45
+ self.conv2 = conv3x3(planes, planes, stride)
46
+ self.bn3 = nn.BatchNorm2d(planes, eps=1e-05, )
47
+ self.downsample = downsample
48
+ self.stride = stride
49
+
50
+ def forward(self, x):
51
+ identity = x
52
+ out = self.bn1(x)
53
+ out = self.conv1(out)
54
+ out = self.bn2(out)
55
+ out = self.prelu(out)
56
+ out = self.conv2(out)
57
+ out = self.bn3(out)
58
+ if self.downsample is not None:
59
+ identity = self.downsample(x)
60
+ out += identity
61
+ return out
62
+
63
+
64
+ class IResNet(nn.Module):
65
+ fc_scale = 7 * 7
66
+
67
+ def __init__(self,
68
+ block, layers, dropout=0, num_features=512, zero_init_residual=False,
69
+ groups=1, width_per_group=64, replace_stride_with_dilation=None, fp16=False):
70
+ super(IResNet, self).__init__()
71
+ self.fp16 = fp16
72
+ self.inplanes = 64
73
+ self.dilation = 1
74
+ if replace_stride_with_dilation is None:
75
+ replace_stride_with_dilation = [False, False, False]
76
+ if len(replace_stride_with_dilation) != 3:
77
+ raise ValueError("replace_stride_with_dilation should be None "
78
+ "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
79
+ self.groups = groups
80
+ self.base_width = width_per_group
81
+ self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
82
+ self.bn1 = nn.BatchNorm2d(self.inplanes, eps=1e-05)
83
+ self.prelu = nn.PReLU(self.inplanes)
84
+ self.layer1 = self._make_layer(block, 64, layers[0], stride=2)
85
+ self.layer2 = self._make_layer(block,
86
+ 128,
87
+ layers[1],
88
+ stride=2,
89
+ dilate=replace_stride_with_dilation[0])
90
+ self.layer3 = self._make_layer(block,
91
+ 256,
92
+ layers[2],
93
+ stride=2,
94
+ dilate=replace_stride_with_dilation[1])
95
+ self.layer4 = self._make_layer(block,
96
+ 512,
97
+ layers[3],
98
+ stride=2,
99
+ dilate=replace_stride_with_dilation[2])
100
+ self.bn2 = nn.BatchNorm2d(512 * block.expansion, eps=1e-05, )
101
+ self.dropout = nn.Dropout(p=dropout, inplace=True)
102
+ self.fc = nn.Linear(512 * block.expansion * self.fc_scale, num_features)
103
+ self.features = nn.BatchNorm1d(num_features, eps=1e-05)
104
+ nn.init.constant_(self.features.weight, 1.0)
105
+ self.features.weight.requires_grad = False
106
+
107
+ for m in self.modules():
108
+ if isinstance(m, nn.Conv2d):
109
+ nn.init.normal_(m.weight, 0, 0.1)
110
+ elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
111
+ nn.init.constant_(m.weight, 1)
112
+ nn.init.constant_(m.bias, 0)
113
+
114
+ if zero_init_residual:
115
+ for m in self.modules():
116
+ if isinstance(m, IBasicBlock):
117
+ nn.init.constant_(m.bn2.weight, 0)
118
+
119
+ def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
120
+ downsample = None
121
+ previous_dilation = self.dilation
122
+ if dilate:
123
+ self.dilation *= stride
124
+ stride = 1
125
+ if stride != 1 or self.inplanes != planes * block.expansion:
126
+ downsample = nn.Sequential(
127
+ conv1x1(self.inplanes, planes * block.expansion, stride),
128
+ nn.BatchNorm2d(planes * block.expansion, eps=1e-05, ),
129
+ )
130
+ layers = []
131
+ layers.append(
132
+ block(self.inplanes, planes, stride, downsample, self.groups,
133
+ self.base_width, previous_dilation))
134
+ self.inplanes = planes * block.expansion
135
+ for _ in range(1, blocks):
136
+ layers.append(
137
+ block(self.inplanes,
138
+ planes,
139
+ groups=self.groups,
140
+ base_width=self.base_width,
141
+ dilation=self.dilation))
142
+
143
+ return nn.Sequential(*layers)
144
+
145
+ def checkpoint(self, func, num_seg, x):
146
+ if self.training:
147
+ return checkpoint_sequential(func, num_seg, x)
148
+ else:
149
+ return func(x)
150
+
151
+ def forward(self, x):
152
+ with torch.cuda.amp.autocast(self.fp16):
153
+ x = self.conv1(x)
154
+ x = self.bn1(x)
155
+ x = self.prelu(x)
156
+ x = self.layer1(x)
157
+ x = self.checkpoint(self.layer2, 20, x)
158
+ x = self.checkpoint(self.layer3, 100, x)
159
+ x = self.layer4(x)
160
+ x = self.bn2(x)
161
+ x = torch.flatten(x, 1)
162
+ x = self.dropout(x)
163
+ x = self.fc(x.float() if self.fp16 else x)
164
+ x = self.features(x)
165
+ return x
166
+
167
+
168
+ def _iresnet(arch, block, layers, pretrained, progress, **kwargs):
169
+ model = IResNet(block, layers, **kwargs)
170
+ if pretrained:
171
+ raise ValueError()
172
+ return model
173
+
174
+
175
+ def iresnet2060(pretrained=False, progress=True, **kwargs):
176
+ return _iresnet('iresnet2060', IBasicBlock, [3, 128, 1024 - 128, 3], pretrained, progress, **kwargs)
deep_3drecon/deep_3drecon_models/arcface_torch/backbones/mobilefacenet.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Adapted from https://github.com/cavalleria/cavaface.pytorch/blob/master/backbone/mobilefacenet.py
3
+ Original author cavalleria
4
+ '''
5
+
6
+ import torch.nn as nn
7
+ from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, Sequential, Module
8
+ import torch
9
+
10
+
11
+ class Flatten(Module):
12
+ def forward(self, x):
13
+ return x.view(x.size(0), -1)
14
+
15
+
16
+ class ConvBlock(Module):
17
+ def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
18
+ super(ConvBlock, self).__init__()
19
+ self.layers = nn.Sequential(
20
+ Conv2d(in_c, out_c, kernel, groups=groups, stride=stride, padding=padding, bias=False),
21
+ BatchNorm2d(num_features=out_c),
22
+ PReLU(num_parameters=out_c)
23
+ )
24
+
25
+ def forward(self, x):
26
+ return self.layers(x)
27
+
28
+
29
+ class LinearBlock(Module):
30
+ def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
31
+ super(LinearBlock, self).__init__()
32
+ self.layers = nn.Sequential(
33
+ Conv2d(in_c, out_c, kernel, stride, padding, groups=groups, bias=False),
34
+ BatchNorm2d(num_features=out_c)
35
+ )
36
+
37
+ def forward(self, x):
38
+ return self.layers(x)
39
+
40
+
41
+ class DepthWise(Module):
42
+ def __init__(self, in_c, out_c, residual=False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1):
43
+ super(DepthWise, self).__init__()
44
+ self.residual = residual
45
+ self.layers = nn.Sequential(
46
+ ConvBlock(in_c, out_c=groups, kernel=(1, 1), padding=(0, 0), stride=(1, 1)),
47
+ ConvBlock(groups, groups, groups=groups, kernel=kernel, padding=padding, stride=stride),
48
+ LinearBlock(groups, out_c, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
49
+ )
50
+
51
+ def forward(self, x):
52
+ short_cut = None
53
+ if self.residual:
54
+ short_cut = x
55
+ x = self.layers(x)
56
+ if self.residual:
57
+ output = short_cut + x
58
+ else:
59
+ output = x
60
+ return output
61
+
62
+
63
+ class Residual(Module):
64
+ def __init__(self, c, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)):
65
+ super(Residual, self).__init__()
66
+ modules = []
67
+ for _ in range(num_block):
68
+ modules.append(DepthWise(c, c, True, kernel, stride, padding, groups))
69
+ self.layers = Sequential(*modules)
70
+
71
+ def forward(self, x):
72
+ return self.layers(x)
73
+
74
+
75
+ class GDC(Module):
76
+ def __init__(self, embedding_size):
77
+ super(GDC, self).__init__()
78
+ self.layers = nn.Sequential(
79
+ LinearBlock(512, 512, groups=512, kernel=(7, 7), stride=(1, 1), padding=(0, 0)),
80
+ Flatten(),
81
+ Linear(512, embedding_size, bias=False),
82
+ BatchNorm1d(embedding_size))
83
+
84
+ def forward(self, x):
85
+ return self.layers(x)
86
+
87
+
88
+ class MobileFaceNet(Module):
89
+ def __init__(self, fp16=False, num_features=512, blocks=(1, 4, 6, 2), scale=2):
90
+ super(MobileFaceNet, self).__init__()
91
+ self.scale = scale
92
+ self.fp16 = fp16
93
+ self.layers = nn.ModuleList()
94
+ self.layers.append(
95
+ ConvBlock(3, 64 * self.scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1))
96
+ )
97
+ if blocks[0] == 1:
98
+ self.layers.append(
99
+ ConvBlock(64 * self.scale, 64 * self.scale, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64)
100
+ )
101
+ else:
102
+ self.layers.append(
103
+ Residual(64 * self.scale, num_block=blocks[0], groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1)),
104
+ )
105
+
106
+ self.layers.extend(
107
+ [
108
+ DepthWise(64 * self.scale, 64 * self.scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128),
109
+ Residual(64 * self.scale, num_block=blocks[1], groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1)),
110
+ DepthWise(64 * self.scale, 128 * self.scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256),
111
+ Residual(128 * self.scale, num_block=blocks[2], groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)),
112
+ DepthWise(128 * self.scale, 128 * self.scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=512),
113
+ Residual(128 * self.scale, num_block=blocks[3], groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)),
114
+ ])
115
+
116
+ self.conv_sep = ConvBlock(128 * self.scale, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0))
117
+ self.features = GDC(num_features)
118
+ self._initialize_weights()
119
+
120
+ def _initialize_weights(self):
121
+ for m in self.modules():
122
+ if isinstance(m, nn.Conv2d):
123
+ nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
124
+ if m.bias is not None:
125
+ m.bias.data.zero_()
126
+ elif isinstance(m, nn.BatchNorm2d):
127
+ m.weight.data.fill_(1)
128
+ m.bias.data.zero_()
129
+ elif isinstance(m, nn.Linear):
130
+ nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
131
+ if m.bias is not None:
132
+ m.bias.data.zero_()
133
+
134
+ def forward(self, x):
135
+ with torch.cuda.amp.autocast(self.fp16):
136
+ for func in self.layers:
137
+ x = func(x)
138
+ x = self.conv_sep(x.float() if self.fp16 else x)
139
+ x = self.features(x)
140
+ return x
141
+
142
+
143
+ def get_mbf(fp16, num_features, blocks=(1, 4, 6, 2), scale=2):
144
+ return MobileFaceNet(fp16, num_features, blocks, scale=scale)
145
+
146
+ def get_mbf_large(fp16, num_features, blocks=(2, 8, 12, 4), scale=4):
147
+ return MobileFaceNet(fp16, num_features, blocks, scale=scale)
deep_3drecon/deep_3drecon_models/arcface_torch/backbones/vit.py ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from timm.models.layers import DropPath, to_2tuple, trunc_normal_
4
+ from typing import Optional, Callable
5
+
6
+ class Mlp(nn.Module):
7
+ def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.ReLU6, drop=0.):
8
+ super().__init__()
9
+ out_features = out_features or in_features
10
+ hidden_features = hidden_features or in_features
11
+ self.fc1 = nn.Linear(in_features, hidden_features)
12
+ self.act = act_layer()
13
+ self.fc2 = nn.Linear(hidden_features, out_features)
14
+ self.drop = nn.Dropout(drop)
15
+
16
+ def forward(self, x):
17
+ x = self.fc1(x)
18
+ x = self.act(x)
19
+ x = self.drop(x)
20
+ x = self.fc2(x)
21
+ x = self.drop(x)
22
+ return x
23
+
24
+
25
+ class VITBatchNorm(nn.Module):
26
+ def __init__(self, num_features):
27
+ super().__init__()
28
+ self.num_features = num_features
29
+ self.bn = nn.BatchNorm1d(num_features=num_features)
30
+
31
+ def forward(self, x):
32
+ return self.bn(x)
33
+
34
+
35
+ class Attention(nn.Module):
36
+ def __init__(self,
37
+ dim: int,
38
+ num_heads: int = 8,
39
+ qkv_bias: bool = False,
40
+ qk_scale: Optional[None] = None,
41
+ attn_drop: float = 0.,
42
+ proj_drop: float = 0.):
43
+ super().__init__()
44
+ self.num_heads = num_heads
45
+ head_dim = dim // num_heads
46
+ # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
47
+ self.scale = qk_scale or head_dim ** -0.5
48
+
49
+ self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
50
+ self.attn_drop = nn.Dropout(attn_drop)
51
+ self.proj = nn.Linear(dim, dim)
52
+ self.proj_drop = nn.Dropout(proj_drop)
53
+
54
+ def forward(self, x):
55
+
56
+ with torch.cuda.amp.autocast(True):
57
+ batch_size, num_token, embed_dim = x.shape
58
+ #qkv is [3,batch_size,num_heads,num_token, embed_dim//num_heads]
59
+ qkv = self.qkv(x).reshape(
60
+ batch_size, num_token, 3, self.num_heads, embed_dim // self.num_heads).permute(2, 0, 3, 1, 4)
61
+ with torch.cuda.amp.autocast(False):
62
+ q, k, v = qkv[0].float(), qkv[1].float(), qkv[2].float()
63
+ attn = (q @ k.transpose(-2, -1)) * self.scale
64
+ attn = attn.softmax(dim=-1)
65
+ attn = self.attn_drop(attn)
66
+ x = (attn @ v).transpose(1, 2).reshape(batch_size, num_token, embed_dim)
67
+ with torch.cuda.amp.autocast(True):
68
+ x = self.proj(x)
69
+ x = self.proj_drop(x)
70
+ return x
71
+
72
+
73
+ class Block(nn.Module):
74
+
75
+ def __init__(self,
76
+ dim: int,
77
+ num_heads: int,
78
+ num_patches: int,
79
+ mlp_ratio: float = 4.,
80
+ qkv_bias: bool = False,
81
+ qk_scale: Optional[None] = None,
82
+ drop: float = 0.,
83
+ attn_drop: float = 0.,
84
+ drop_path: float = 0.,
85
+ act_layer: Callable = nn.ReLU6,
86
+ norm_layer: str = "ln",
87
+ patch_n: int = 144):
88
+ super().__init__()
89
+
90
+ if norm_layer == "bn":
91
+ self.norm1 = VITBatchNorm(num_features=num_patches)
92
+ self.norm2 = VITBatchNorm(num_features=num_patches)
93
+ elif norm_layer == "ln":
94
+ self.norm1 = nn.LayerNorm(dim)
95
+ self.norm2 = nn.LayerNorm(dim)
96
+
97
+ self.attn = Attention(
98
+ dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
99
+ # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
100
+ self.drop_path = DropPath(
101
+ drop_path) if drop_path > 0. else nn.Identity()
102
+ mlp_hidden_dim = int(dim * mlp_ratio)
103
+ self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim,
104
+ act_layer=act_layer, drop=drop)
105
+ self.extra_gflops = (num_heads * patch_n * (dim//num_heads)*patch_n * 2) / (1000**3)
106
+
107
+ def forward(self, x):
108
+ x = x + self.drop_path(self.attn(self.norm1(x)))
109
+ with torch.cuda.amp.autocast(True):
110
+ x = x + self.drop_path(self.mlp(self.norm2(x)))
111
+ return x
112
+
113
+
114
+ class PatchEmbed(nn.Module):
115
+ def __init__(self, img_size=108, patch_size=9, in_channels=3, embed_dim=768):
116
+ super().__init__()
117
+ img_size = to_2tuple(img_size)
118
+ patch_size = to_2tuple(patch_size)
119
+ num_patches = (img_size[1] // patch_size[1]) * \
120
+ (img_size[0] // patch_size[0])
121
+ self.img_size = img_size
122
+ self.patch_size = patch_size
123
+ self.num_patches = num_patches
124
+ self.proj = nn.Conv2d(in_channels, embed_dim,
125
+ kernel_size=patch_size, stride=patch_size)
126
+
127
+ def forward(self, x):
128
+ batch_size, channels, height, width = x.shape
129
+ assert height == self.img_size[0] and width == self.img_size[1], \
130
+ f"Input image size ({height}*{width}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
131
+ x = self.proj(x).flatten(2).transpose(1, 2)
132
+ return x
133
+
134
+
135
+ class VisionTransformer(nn.Module):
136
+ """ Vision Transformer with support for patch or hybrid CNN input stage
137
+ """
138
+
139
+ def __init__(self,
140
+ img_size: int = 112,
141
+ patch_size: int = 16,
142
+ in_channels: int = 3,
143
+ num_classes: int = 1000,
144
+ embed_dim: int = 768,
145
+ depth: int = 12,
146
+ num_heads: int = 12,
147
+ mlp_ratio: float = 4.,
148
+ qkv_bias: bool = False,
149
+ qk_scale: Optional[None] = None,
150
+ drop_rate: float = 0.,
151
+ attn_drop_rate: float = 0.,
152
+ drop_path_rate: float = 0.,
153
+ hybrid_backbone: Optional[None] = None,
154
+ norm_layer: str = "ln",
155
+ mask_ratio = 0.1,
156
+ using_checkpoint = False,
157
+ ):
158
+ super().__init__()
159
+ self.num_classes = num_classes
160
+ # num_features for consistency with other models
161
+ self.num_features = self.embed_dim = embed_dim
162
+
163
+ if hybrid_backbone is not None:
164
+ raise ValueError
165
+ else:
166
+ self.patch_embed = PatchEmbed(img_size=img_size, patch_size=patch_size, in_channels=in_channels, embed_dim=embed_dim)
167
+ self.mask_ratio = mask_ratio
168
+ self.using_checkpoint = using_checkpoint
169
+ num_patches = self.patch_embed.num_patches
170
+ self.num_patches = num_patches
171
+
172
+ self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
173
+ self.pos_drop = nn.Dropout(p=drop_rate)
174
+
175
+ # stochastic depth decay rule
176
+ dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]
177
+ patch_n = (img_size//patch_size)**2
178
+ self.blocks = nn.ModuleList(
179
+ [
180
+ Block(dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
181
+ drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer,
182
+ num_patches=num_patches, patch_n=patch_n)
183
+ for i in range(depth)]
184
+ )
185
+ self.extra_gflops = 0.0
186
+ for _block in self.blocks:
187
+ self.extra_gflops += _block.extra_gflops
188
+
189
+ if norm_layer == "ln":
190
+ self.norm = nn.LayerNorm(embed_dim)
191
+ elif norm_layer == "bn":
192
+ self.norm = VITBatchNorm(self.num_patches)
193
+
194
+ # features head
195
+ self.feature = nn.Sequential(
196
+ nn.Linear(in_features=embed_dim * num_patches, out_features=embed_dim, bias=False),
197
+ nn.BatchNorm1d(num_features=embed_dim, eps=2e-5),
198
+ nn.Linear(in_features=embed_dim, out_features=num_classes, bias=False),
199
+ nn.BatchNorm1d(num_features=num_classes, eps=2e-5)
200
+ )
201
+
202
+ self.mask_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
203
+ torch.nn.init.normal_(self.mask_token, std=.02)
204
+ trunc_normal_(self.pos_embed, std=.02)
205
+ # trunc_normal_(self.cls_token, std=.02)
206
+ self.apply(self._init_weights)
207
+
208
+ def _init_weights(self, m):
209
+ if isinstance(m, nn.Linear):
210
+ trunc_normal_(m.weight, std=.02)
211
+ if isinstance(m, nn.Linear) and m.bias is not None:
212
+ nn.init.constant_(m.bias, 0)
213
+ elif isinstance(m, nn.LayerNorm):
214
+ nn.init.constant_(m.bias, 0)
215
+ nn.init.constant_(m.weight, 1.0)
216
+
217
+ @torch.jit.ignore
218
+ def no_weight_decay(self):
219
+ return {'pos_embed', 'cls_token'}
220
+
221
+ def get_classifier(self):
222
+ return self.head
223
+
224
+ def random_masking(self, x, mask_ratio=0.1):
225
+ """
226
+ Perform per-sample random masking by per-sample shuffling.
227
+ Per-sample shuffling is done by argsort random noise.
228
+ x: [N, L, D], sequence
229
+ """
230
+ N, L, D = x.size() # batch, length, dim
231
+ len_keep = int(L * (1 - mask_ratio))
232
+
233
+ noise = torch.rand(N, L, device=x.device) # noise in [0, 1]
234
+
235
+ # sort noise for each sample
236
+ # ascend: small is keep, large is remove
237
+ ids_shuffle = torch.argsort(noise, dim=1)
238
+ ids_restore = torch.argsort(ids_shuffle, dim=1)
239
+
240
+ # keep the first subset
241
+ ids_keep = ids_shuffle[:, :len_keep]
242
+ x_masked = torch.gather(
243
+ x, dim=1, index=ids_keep.unsqueeze(-1).repeat(1, 1, D))
244
+
245
+ # generate the binary mask: 0 is keep, 1 is remove
246
+ mask = torch.ones([N, L], device=x.device)
247
+ mask[:, :len_keep] = 0
248
+ # unshuffle to get the binary mask
249
+ mask = torch.gather(mask, dim=1, index=ids_restore)
250
+
251
+ return x_masked, mask, ids_restore
252
+
253
+ def forward_features(self, x):
254
+ B = x.shape[0]
255
+ x = self.patch_embed(x)
256
+ x = x + self.pos_embed
257
+ x = self.pos_drop(x)
258
+
259
+ if self.training and self.mask_ratio > 0:
260
+ x, _, ids_restore = self.random_masking(x)
261
+
262
+ for func in self.blocks:
263
+ if self.using_checkpoint and self.training:
264
+ from torch.utils.checkpoint import checkpoint
265
+ x = checkpoint(func, x)
266
+ else:
267
+ x = func(x)
268
+ x = self.norm(x.float())
269
+
270
+ if self.training and self.mask_ratio > 0:
271
+ mask_tokens = self.mask_token.repeat(x.shape[0], ids_restore.shape[1] - x.shape[1], 1)
272
+ x_ = torch.cat([x[:, :, :], mask_tokens], dim=1) # no cls token
273
+ x_ = torch.gather(x_, dim=1, index=ids_restore.unsqueeze(-1).repeat(1, 1, x.shape[2])) # unshuffle
274
+ x = x_
275
+ return torch.reshape(x, (B, self.num_patches * self.embed_dim))
276
+
277
+ def forward(self, x):
278
+ x = self.forward_features(x)
279
+ x = self.feature(x)
280
+ return x
deep_3drecon/deep_3drecon_models/arcface_torch/configs/3millions.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # configs for test speed
4
+
5
+ config = edict()
6
+ config.margin_list = (1.0, 0.0, 0.4)
7
+ config.network = "mbf"
8
+ config.resume = False
9
+ config.output = None
10
+ config.embedding_size = 512
11
+ config.sample_rate = 0.1
12
+ config.fp16 = True
13
+ config.momentum = 0.9
14
+ config.weight_decay = 5e-4
15
+ config.batch_size = 512 # total_batch_size = batch_size * num_gpus
16
+ config.lr = 0.1 # batch size is 512
17
+
18
+ config.rec = "synthetic"
19
+ config.num_classes = 30 * 10000
20
+ config.num_image = 100000
21
+ config.num_epoch = 30
22
+ config.warmup_epoch = -1
23
+ config.val_targets = []
deep_3drecon/deep_3drecon_models/arcface_torch/configs/__init__.py ADDED
File without changes
deep_3drecon/deep_3drecon_models/arcface_torch/configs/base.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # make training faster
4
+ # our RAM is 256G
5
+ # mount -t tmpfs -o size=140G tmpfs /train_tmp
6
+
7
+ config = edict()
8
+
9
+ # Margin Base Softmax
10
+ config.margin_list = (1.0, 0.5, 0.0)
11
+ config.network = "r50"
12
+ config.resume = False
13
+ config.save_all_states = False
14
+ config.output = "ms1mv3_arcface_r50"
15
+
16
+ config.embedding_size = 512
17
+
18
+ # Partial FC
19
+ config.sample_rate = 1
20
+ config.interclass_filtering_threshold = 0
21
+
22
+ config.fp16 = False
23
+ config.batch_size = 128
24
+
25
+ # For SGD
26
+ config.optimizer = "sgd"
27
+ config.lr = 0.1
28
+ config.momentum = 0.9
29
+ config.weight_decay = 5e-4
30
+
31
+ # For AdamW
32
+ # config.optimizer = "adamw"
33
+ # config.lr = 0.001
34
+ # config.weight_decay = 0.1
35
+
36
+ config.verbose = 2000
37
+ config.frequent = 10
38
+
39
+ # For Large Sacle Dataset, such as WebFace42M
40
+ config.dali = False
41
+
42
+ # Gradient ACC
43
+ config.gradient_acc = 1
44
+
45
+ # setup seed
46
+ config.seed = 2048
47
+
48
+ # dataload numworkers
49
+ config.num_workers = 2
50
+
51
+ # WandB Logger
52
+ config.wandb_key = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
53
+ config.suffix_run_name = None
54
+ config.using_wandb = False
55
+ config.wandb_entity = "entity"
56
+ config.wandb_project = "project"
57
+ config.wandb_log_all = True
58
+ config.save_artifacts = False
59
+ config.wandb_resume = False # resume wandb run: Only if the you wand t resume the last run that it was interrupted
deep_3drecon/deep_3drecon_models/arcface_torch/configs/glint360k_mbf.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # make training faster
4
+ # our RAM is 256G
5
+ # mount -t tmpfs -o size=140G tmpfs /train_tmp
6
+
7
+ config = edict()
8
+ config.margin_list = (1.0, 0.0, 0.4)
9
+ config.network = "mbf"
10
+ config.resume = False
11
+ config.output = None
12
+ config.embedding_size = 512
13
+ config.sample_rate = 1.0
14
+ config.fp16 = True
15
+ config.momentum = 0.9
16
+ config.weight_decay = 1e-4
17
+ config.batch_size = 128
18
+ config.lr = 0.1
19
+ config.verbose = 2000
20
+ config.dali = False
21
+
22
+ config.rec = "/train_tmp/glint360k"
23
+ config.num_classes = 360232
24
+ config.num_image = 17091657
25
+ config.num_epoch = 20
26
+ config.warmup_epoch = 0
27
+ config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
deep_3drecon/deep_3drecon_models/arcface_torch/configs/glint360k_r100.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # make training faster
4
+ # our RAM is 256G
5
+ # mount -t tmpfs -o size=140G tmpfs /train_tmp
6
+
7
+ config = edict()
8
+ config.margin_list = (1.0, 0.0, 0.4)
9
+ config.network = "r100"
10
+ config.resume = False
11
+ config.output = None
12
+ config.embedding_size = 512
13
+ config.sample_rate = 1.0
14
+ config.fp16 = True
15
+ config.momentum = 0.9
16
+ config.weight_decay = 1e-4
17
+ config.batch_size = 128
18
+ config.lr = 0.1
19
+ config.verbose = 2000
20
+ config.dali = False
21
+
22
+ config.rec = "/train_tmp/glint360k"
23
+ config.num_classes = 360232
24
+ config.num_image = 17091657
25
+ config.num_epoch = 20
26
+ config.warmup_epoch = 0
27
+ config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
deep_3drecon/deep_3drecon_models/arcface_torch/configs/glint360k_r50.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # make training faster
4
+ # our RAM is 256G
5
+ # mount -t tmpfs -o size=140G tmpfs /train_tmp
6
+
7
+ config = edict()
8
+ config.margin_list = (1.0, 0.0, 0.4)
9
+ config.network = "r50"
10
+ config.resume = False
11
+ config.output = None
12
+ config.embedding_size = 512
13
+ config.sample_rate = 1.0
14
+ config.fp16 = True
15
+ config.momentum = 0.9
16
+ config.weight_decay = 1e-4
17
+ config.batch_size = 128
18
+ config.lr = 0.1
19
+ config.verbose = 2000
20
+ config.dali = False
21
+
22
+ config.rec = "/train_tmp/glint360k"
23
+ config.num_classes = 360232
24
+ config.num_image = 17091657
25
+ config.num_epoch = 20
26
+ config.warmup_epoch = 0
27
+ config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv2_mbf.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # make training faster
4
+ # our RAM is 256G
5
+ # mount -t tmpfs -o size=140G tmpfs /train_tmp
6
+
7
+ config = edict()
8
+ config.margin_list = (1.0, 0.5, 0.0)
9
+ config.network = "mbf"
10
+ config.resume = False
11
+ config.output = None
12
+ config.embedding_size = 512
13
+ config.sample_rate = 1.0
14
+ config.fp16 = True
15
+ config.momentum = 0.9
16
+ config.weight_decay = 1e-4
17
+ config.batch_size = 128
18
+ config.lr = 0.1
19
+ config.verbose = 2000
20
+ config.dali = False
21
+
22
+ config.rec = "/train_tmp/faces_emore"
23
+ config.num_classes = 85742
24
+ config.num_image = 5822653
25
+ config.num_epoch = 40
26
+ config.warmup_epoch = 0
27
+ config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv2_r100.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # make training faster
4
+ # our RAM is 256G
5
+ # mount -t tmpfs -o size=140G tmpfs /train_tmp
6
+
7
+ config = edict()
8
+ config.margin_list = (1.0, 0.5, 0.0)
9
+ config.network = "r100"
10
+ config.resume = False
11
+ config.output = None
12
+ config.embedding_size = 512
13
+ config.sample_rate = 1.0
14
+ config.fp16 = True
15
+ config.momentum = 0.9
16
+ config.weight_decay = 5e-4
17
+ config.batch_size = 128
18
+ config.lr = 0.1
19
+ config.verbose = 2000
20
+ config.dali = False
21
+
22
+ config.rec = "/train_tmp/faces_emore"
23
+ config.num_classes = 85742
24
+ config.num_image = 5822653
25
+ config.num_epoch = 20
26
+ config.warmup_epoch = 0
27
+ config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv2_r50.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # make training faster
4
+ # our RAM is 256G
5
+ # mount -t tmpfs -o size=140G tmpfs /train_tmp
6
+
7
+ config = edict()
8
+ config.margin_list = (1.0, 0.5, 0.0)
9
+ config.network = "r50"
10
+ config.resume = False
11
+ config.output = None
12
+ config.embedding_size = 512
13
+ config.sample_rate = 1.0
14
+ config.fp16 = True
15
+ config.momentum = 0.9
16
+ config.weight_decay = 5e-4
17
+ config.batch_size = 128
18
+ config.lr = 0.1
19
+ config.verbose = 2000
20
+ config.dali = False
21
+
22
+ config.rec = "/train_tmp/faces_emore"
23
+ config.num_classes = 85742
24
+ config.num_image = 5822653
25
+ config.num_epoch = 20
26
+ config.warmup_epoch = 0
27
+ config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv3_mbf.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # make training faster
4
+ # our RAM is 256G
5
+ # mount -t tmpfs -o size=140G tmpfs /train_tmp
6
+
7
+ config = edict()
8
+ config.margin_list = (1.0, 0.5, 0.0)
9
+ config.network = "mbf"
10
+ config.resume = False
11
+ config.output = None
12
+ config.embedding_size = 512
13
+ config.sample_rate = 1.0
14
+ config.fp16 = True
15
+ config.momentum = 0.9
16
+ config.weight_decay = 1e-4
17
+ config.batch_size = 128
18
+ config.lr = 0.1
19
+ config.verbose = 2000
20
+ config.dali = False
21
+
22
+ config.rec = "/train_tmp/ms1m-retinaface-t1"
23
+ config.num_classes = 93431
24
+ config.num_image = 5179510
25
+ config.num_epoch = 40
26
+ config.warmup_epoch = 0
27
+ config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv3_r100.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # make training faster
4
+ # our RAM is 256G
5
+ # mount -t tmpfs -o size=140G tmpfs /train_tmp
6
+
7
+ config = edict()
8
+ config.margin_list = (1.0, 0.5, 0.0)
9
+ config.network = "r100"
10
+ config.resume = False
11
+ config.output = None
12
+ config.embedding_size = 512
13
+ config.sample_rate = 1.0
14
+ config.fp16 = True
15
+ config.momentum = 0.9
16
+ config.weight_decay = 5e-4
17
+ config.batch_size = 128
18
+ config.lr = 0.1
19
+ config.verbose = 2000
20
+ config.dali = False
21
+
22
+ config.rec = "/train_tmp/ms1m-retinaface-t1"
23
+ config.num_classes = 93431
24
+ config.num_image = 5179510
25
+ config.num_epoch = 20
26
+ config.warmup_epoch = 0
27
+ config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv3_r50.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # make training faster
4
+ # our RAM is 256G
5
+ # mount -t tmpfs -o size=140G tmpfs /train_tmp
6
+
7
+ config = edict()
8
+ config.margin_list = (1.0, 0.5, 0.0)
9
+ config.network = "r50"
10
+ config.resume = False
11
+ config.output = None
12
+ config.embedding_size = 512
13
+ config.sample_rate = 1.0
14
+ config.fp16 = True
15
+ config.momentum = 0.9
16
+ config.weight_decay = 5e-4
17
+ config.batch_size = 128
18
+ config.lr = 0.1
19
+ config.verbose = 2000
20
+ config.dali = False
21
+
22
+ config.rec = "/train_tmp/ms1m-retinaface-t1"
23
+ config.num_classes = 93431
24
+ config.num_image = 5179510
25
+ config.num_epoch = 20
26
+ config.warmup_epoch = 0
27
+ config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]