Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +3 -0
- Dockerfile +0 -0
- README.md +169 -0
- app.py +3 -0
- checkpoints/checkpoints/240112_icl_audio2secc_vox2_cmlr/config.yaml +86 -0
- checkpoints/checkpoints/240112_icl_audio2secc_vox2_cmlr/model_ckpt_steps_1856000.ckpt +3 -0
- checkpoints/checkpoints/mimictalk_orig/os_secc2plane_torso/config.yaml +152 -0
- checkpoints/checkpoints/mimictalk_orig/os_secc2plane_torso/model_ckpt_steps_100000.ckpt +3 -0
- checkpoints/checkpoints/pretrained_ckpts/mit_b0.pth +3 -0
- checkpoints/checkpoints_mimictalk/German_20s/config.yaml +155 -0
- checkpoints/checkpoints_mimictalk/German_20s/model_ckpt_steps_10000.ckpt +3 -0
- deep_3drecon/BFM/.gitkeep +0 -0
- deep_3drecon/BFM/01_MorphableModel.mat +3 -0
- deep_3drecon/BFM/BFM_exp_idx.mat +0 -0
- deep_3drecon/BFM/BFM_front_idx.mat +0 -0
- deep_3drecon/BFM/BFM_model_front.mat +3 -0
- deep_3drecon/BFM/Exp_Pca.bin +3 -0
- deep_3drecon/BFM/basel_53201.txt +0 -0
- deep_3drecon/BFM/facemodel_info.mat +3 -0
- deep_3drecon/BFM/index_mp468_from_mesh35709.npy +3 -0
- deep_3drecon/BFM/index_mp468_from_mesh35709_v1.npy +3 -0
- deep_3drecon/BFM/index_mp468_from_mesh35709_v2.npy +3 -0
- deep_3drecon/BFM/index_mp468_from_mesh35709_v3.1.npy +3 -0
- deep_3drecon/BFM/index_mp468_from_mesh35709_v3.npy +3 -0
- deep_3drecon/BFM/select_vertex_id.mat +0 -0
- deep_3drecon/BFM/similarity_Lm3D_all.mat +0 -0
- deep_3drecon/BFM/std_exp.txt +1 -0
- deep_3drecon/__init__.py +1 -0
- deep_3drecon/bfm_left_eye_faces.npy +3 -0
- deep_3drecon/bfm_right_eye_faces.npy +3 -0
- deep_3drecon/data_preparation.py +45 -0
- deep_3drecon/deep_3drecon_models/__init__.py +67 -0
- deep_3drecon/deep_3drecon_models/arcface_torch/README.md +218 -0
- deep_3drecon/deep_3drecon_models/arcface_torch/backbones/__init__.py +85 -0
- deep_3drecon/deep_3drecon_models/arcface_torch/backbones/iresnet.py +194 -0
- deep_3drecon/deep_3drecon_models/arcface_torch/backbones/iresnet2060.py +176 -0
- deep_3drecon/deep_3drecon_models/arcface_torch/backbones/mobilefacenet.py +147 -0
- deep_3drecon/deep_3drecon_models/arcface_torch/backbones/vit.py +280 -0
- deep_3drecon/deep_3drecon_models/arcface_torch/configs/3millions.py +23 -0
- deep_3drecon/deep_3drecon_models/arcface_torch/configs/__init__.py +0 -0
- deep_3drecon/deep_3drecon_models/arcface_torch/configs/base.py +59 -0
- deep_3drecon/deep_3drecon_models/arcface_torch/configs/glint360k_mbf.py +27 -0
- deep_3drecon/deep_3drecon_models/arcface_torch/configs/glint360k_r100.py +27 -0
- deep_3drecon/deep_3drecon_models/arcface_torch/configs/glint360k_r50.py +27 -0
- deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv2_mbf.py +27 -0
- deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv2_r100.py +27 -0
- deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv2_r50.py +27 -0
- deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv3_mbf.py +27 -0
- deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv3_r100.py +27 -0
- deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv3_r50.py +27 -0
.gitattributes
CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
deep_3drecon/BFM/01_MorphableModel.mat filter=lfs diff=lfs merge=lfs -text
|
37 |
+
deep_3drecon/BFM/BFM_model_front.mat filter=lfs diff=lfs merge=lfs -text
|
38 |
+
deep_3drecon/BFM/facemodel_info.mat filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
File without changes
|
README.md
ADDED
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Demo
|
3 |
+
emoji: 👀
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: pink
|
6 |
+
sdk: docker
|
7 |
+
pinned: false
|
8 |
+
---
|
9 |
+
|
10 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
11 |
+
|
12 |
+
|
13 |
+
# MimicTalk: Mimicking a personalized and expressive 3D talking face in few minutes | NeurIPS 2024
|
14 |
+
[](https://arxiv.org/abs/2401.08503)| [](https://github.com/yerfor/MimicTalk) | [English Readme](./README.md)
|
16 |
+
|
17 |
+
这个仓库是MimicTalk的官方PyTorch实现, 用于实现特定说话人的高表现力的虚拟人视频合成。该仓库代码基于我们先前的工作[Real3D-Portrait](https://github.com/yerfor/Real3DPortrait) (ICLR 2024),即基于NeRF的one-shot说话人合成,这让Mimictalk的训练加速且效果增强。您可以访问我们的[项目页面](https://mimictalk.github.io/)以观看Demo视频, 阅读我们的[论文](https://arxiv.org/abs/2410.06734)以了解技术细节。
|
18 |
+
|
19 |
+
<p align="center">
|
20 |
+
<br>
|
21 |
+
<img src="assets/mimictalk.png" width="100%"/>
|
22 |
+
<br>
|
23 |
+
</p>
|
24 |
+
|
25 |
+
# 快速上手!
|
26 |
+
## 安装环境
|
27 |
+
请参照[环境配置文档](docs/prepare_env/install_guide-zh.md),配置Conda环境`mimictalk`
|
28 |
+
## 下载预训练与第三方模型
|
29 |
+
### 3DMM BFM模型
|
30 |
+
下载3DMM BFM模型:[Google Drive](https://drive.google.com/drive/folders/1o4t5YIw7w4cMUN4bgU9nPf6IyWVG1bEk?usp=sharing) 或 [BaiduYun Disk](https://pan.baidu.com/s/1aqv1z_qZ23Vp2VP4uxxblQ?pwd=m9q5 ) 提取码: m9q5
|
31 |
+
|
32 |
+
|
33 |
+
下载完成后,放置全部的文件到`deep_3drecon/BFM`里,文件结构如下:
|
34 |
+
```
|
35 |
+
deep_3drecon/BFM/
|
36 |
+
├── 01_MorphableModel.mat
|
37 |
+
├── BFM_exp_idx.mat
|
38 |
+
├── BFM_front_idx.mat
|
39 |
+
├── BFM_model_front.mat
|
40 |
+
├── Exp_Pca.bin
|
41 |
+
├── facemodel_info.mat
|
42 |
+
├── index_mp468_from_mesh35709.npy
|
43 |
+
├── mediapipe_in_bfm53201.npy
|
44 |
+
└── std_exp.txt
|
45 |
+
```
|
46 |
+
|
47 |
+
### 预训练模型
|
48 |
+
下载预训练的MimicTalk相关Checkpoints:[Google Drive](https://drive.google.com/drive/folders/1Kc6ueDO9HFDN3BhtJCEKNCZtyKHSktaA?usp=sharing) or [BaiduYun Disk](https://pan.baidu.com/s/1nQKyGV5JB6rJtda7qsThUg?pwd=mimi) 提取码: mimi
|
49 |
+
|
50 |
+
下载完成后,放置全部的文件到`checkpoints`与`checkpoints_mimictalk`里并解压,文件结构如下:
|
51 |
+
```
|
52 |
+
checkpoints/
|
53 |
+
├── mimictalk_orig
|
54 |
+
│ └── os_secc2plane_torso
|
55 |
+
│ ├── config.yaml
|
56 |
+
│ └── model_ckpt_steps_100000.ckpt
|
57 |
+
|-- 240112_icl_audio2secc_vox2_cmlr
|
58 |
+
│ ├── config.yaml
|
59 |
+
│ └── model_ckpt_steps_1856000.ckpt
|
60 |
+
└── pretrained_ckpts
|
61 |
+
└── mit_b0.pth
|
62 |
+
|
63 |
+
checkpoints_mimictalk/
|
64 |
+
└── German_20s
|
65 |
+
├── config.yaml
|
66 |
+
└── model_ckpt_steps_10000.ckpt
|
67 |
+
```
|
68 |
+
|
69 |
+
## MimicTalk训练与推理的最简命令
|
70 |
+
```
|
71 |
+
python inference/train_mimictalk_on_a_video.py # train the model, this may take 10 minutes for 2,000 steps
|
72 |
+
python inference/mimictalk_infer.py # infer the model
|
73 |
+
```
|
74 |
+
|
75 |
+
|
76 |
+
# 训练与推理细节
|
77 |
+
我们目前提供了**命令行(CLI)**与**Gradio WebUI**推理方式。音频驱动推理的人像信息来自于`torso_ckpt`,因此需要至少再提供`driving audio`用于推理。另外,可以提供`style video`让模型能够预测与该视频风格一致的说话人动作。
|
78 |
+
|
79 |
+
首先,切换至项目根目录并启用Conda环境:
|
80 |
+
```bash
|
81 |
+
cd <Real3DPortraitRoot>
|
82 |
+
conda activate mimictalk
|
83 |
+
export PYTHONPATH=./
|
84 |
+
export HF_ENDPOINT=https://hf-mirror.com
|
85 |
+
```
|
86 |
+
|
87 |
+
## Gradio WebUI推理
|
88 |
+
启动Gradio WebUI,按照提示上传素材,点击`Training`按钮进行训练;训练完成后点击`Generate`按钮即可推理:
|
89 |
+
```bash
|
90 |
+
python inference/app_mimictalk.py
|
91 |
+
```
|
92 |
+
|
93 |
+
## 命令行特定说话人训练
|
94 |
+
|
95 |
+
需要至少提供`source video`,训练指令:
|
96 |
+
```bash
|
97 |
+
python inference/train_mimictalk_on_a_video.py \
|
98 |
+
--video_id <PATH_TO_SOURCE_VIDEO> \
|
99 |
+
--max_updates <UPDATES_NUMBER> \
|
100 |
+
--work_dir <PATH_TO_SAVING_CKPT>
|
101 |
+
```
|
102 |
+
|
103 |
+
一些可选参数注释:
|
104 |
+
|
105 |
+
- `--torso_ckpt` 预训练的Real3D-Portrait模型
|
106 |
+
- `--max_updates` 训练更新次数
|
107 |
+
- `--batch_size` 训练的batch size: `1` 需要约8GB显存; `2`需要约15GB显存
|
108 |
+
- `--lr_triplane` triplane的学习率:对于视频输入, 应为0.1; 对于图片输入,应为0.001
|
109 |
+
- `--work_dir` 未指定时,将默认存储在`checkpoints_mimictalk/`中
|
110 |
+
|
111 |
+
指令示例:
|
112 |
+
```bash
|
113 |
+
python inference/train_mimictalk_on_a_video.py \
|
114 |
+
--video_id data/raw/videos/German_20s.mp4 \
|
115 |
+
--max_updates 2000 \
|
116 |
+
--work_dir checkpoints_mimictalk/German_20s
|
117 |
+
```
|
118 |
+
|
119 |
+
## 命令行推理
|
120 |
+
|
121 |
+
需要至少提供`driving audio`,可选提供`driving style`,推理指令:
|
122 |
+
```bash
|
123 |
+
python inference/mimictalk_infer.py \
|
124 |
+
--drv_aud <PATH_TO_AUDIO> \
|
125 |
+
--drv_style <PATH_TO_STYLE_VIDEO, OPTIONAL> \
|
126 |
+
--drv_pose <PATH_TO_POSE_VIDEO, OPTIONAL> \
|
127 |
+
--bg_img <PATH_TO_BACKGROUND_IMAGE, OPTIONAL> \
|
128 |
+
--out_name <PATH_TO_OUTPUT_VIDEO, OPTIONAL>
|
129 |
+
```
|
130 |
+
|
131 |
+
一些可选参数注释:
|
132 |
+
- `--drv_pose` 指定时提供了运动pose信息,不指定则为静态运动
|
133 |
+
- `--bg_img` ��定时提供了背景信息,不指定则为source image提取的背景
|
134 |
+
- `--mouth_amp` 嘴部张幅参数,值越大张幅越大
|
135 |
+
- `--map_to_init_pose` 值为`True`时,首帧的pose将被映射到source pose,后续帧也作相同变换
|
136 |
+
- `--temperature` 代表audio2motion的采样温度,值越大结果越多样,但同时精确度越低
|
137 |
+
- `--out_name` 不指定时,结果将保存在`infer_out/tmp/`中
|
138 |
+
- `--out_mode` 值为`final`时,只输出说话人视频;值为`concat_debug`时,同时输出一些可视化的中间结果
|
139 |
+
|
140 |
+
推理命令例子:
|
141 |
+
```bash
|
142 |
+
python inference/mimictalk_infer.py \
|
143 |
+
--drv_aud data/raw/examples/Obama_5s.wav \
|
144 |
+
--drv_pose data/raw/examples/German_20s.mp4 \
|
145 |
+
--drv_style data/raw/examples/German_20s.mp4 \
|
146 |
+
--bg_img data/raw/examples/bg.png \
|
147 |
+
--out_name output.mp4 \
|
148 |
+
--out_mode final
|
149 |
+
```
|
150 |
+
|
151 |
+
# 声明
|
152 |
+
任何组织或个人未经本人同意,不得使用本文提及的任何技术生成他人说话的视频,包括但不限于政府领导人、政界人士、社会名流等。如不遵守本条款,则可能违反版权法。
|
153 |
+
|
154 |
+
# 引用我们
|
155 |
+
如果这个仓库对你有帮助,请考虑引用我们的工作:
|
156 |
+
```
|
157 |
+
@inproceedings{ye2024mimicktalk,
|
158 |
+
author = {Ye, Zhenhui and Zhong, Tianyun and Ren, Yi and Yang, Jiaqi and Li, Weichuang and Huang, Jiangwei and Jiang, Ziyue and He, Jinzheng and Huang, Rongjie and Liu, Jinglin and Zhang, Chen and Yin, Xiang and Ma, Zejun and Zhao, Zhou},
|
159 |
+
title = {MimicTalk: Mimicking a personalized and expressive 3D talking face in few minutes},
|
160 |
+
journal = {NeurIPS},
|
161 |
+
year = {2024},
|
162 |
+
}
|
163 |
+
@inproceedings{ye2024real3d,
|
164 |
+
title = {Real3D-Portrait: One-shot Realistic 3D Talking Portrait Synthesis},
|
165 |
+
author = {Ye, Zhenhui and Zhong, Tianyun and Ren, Yi and Yang, Jiaqi and Li, Weichuang and Huang, Jiawei and Jiang, Ziyue and He, Jinzheng and Huang, Rongjie and Liu, Jinglin and others},
|
166 |
+
journal = {ICLR},
|
167 |
+
year={2024}
|
168 |
+
}
|
169 |
+
```
|
app.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoModel
|
2 |
+
model = AutoModel.from_pretrained("mrbear1024/mimictalk")
|
3 |
+
print(model)
|
checkpoints/checkpoints/240112_icl_audio2secc_vox2_cmlr/config.yaml
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accumulate_grad_batches: 1
|
2 |
+
amp: false
|
3 |
+
audio_type: hubert
|
4 |
+
base_config:
|
5 |
+
- ./audio2secc_vae.yaml
|
6 |
+
batch_size: 4
|
7 |
+
binarization_args:
|
8 |
+
with_coeff: true
|
9 |
+
with_hubert: true
|
10 |
+
with_mel: true
|
11 |
+
binary_data_dir: data/binary/voxceleb2_audio2motion
|
12 |
+
blink_mode: blink_unit
|
13 |
+
clip_grad_norm: 0.5
|
14 |
+
clip_grad_value: 0
|
15 |
+
debug: false
|
16 |
+
ds_name: Concat_voxceleb2_CMLR
|
17 |
+
eval_max_batches: 10
|
18 |
+
gen_dir_name: ''
|
19 |
+
hidden_size: 256
|
20 |
+
icl_model_type: icl_flow_matching
|
21 |
+
infer_audio_source_name: ''
|
22 |
+
infer_ckpt_steps: 40000
|
23 |
+
infer_out_npy_name: ''
|
24 |
+
init_from_ckpt: ''
|
25 |
+
lambda_kl: 0.02
|
26 |
+
lambda_kl_t1: 2000
|
27 |
+
lambda_kl_t2: 2000
|
28 |
+
lambda_l2_reg_exp: 0.0
|
29 |
+
lambda_lap_exp: 0.0
|
30 |
+
lambda_lap_exp_x1: 0.1
|
31 |
+
lambda_mse_exp: 0.0
|
32 |
+
lambda_mse_exp_x1: 0.1
|
33 |
+
lambda_mse_icl: 1.0
|
34 |
+
lambda_mse_lm2d: 0.0
|
35 |
+
lambda_mse_lm3d: 0.0
|
36 |
+
lambda_mse_lm3d_x1: 0.1
|
37 |
+
lambda_sync_lm3d: 0.05
|
38 |
+
load_ckpt: ''
|
39 |
+
load_db_to_memory: false
|
40 |
+
lr: 0.0005
|
41 |
+
max_sentences_per_batch: 512
|
42 |
+
max_tokens_per_batch: 20000
|
43 |
+
max_updates: 4000000
|
44 |
+
motion_type: exp
|
45 |
+
num_ckpt_keep: 100
|
46 |
+
num_sanity_val_steps: 5
|
47 |
+
num_valid_plots: 1
|
48 |
+
num_workers: 8
|
49 |
+
optimizer_adam_beta1: 0.9
|
50 |
+
optimizer_adam_beta2: 0.999
|
51 |
+
print_nan_grads: false
|
52 |
+
process_id: 0
|
53 |
+
raw_data_dir: /home/tiger/datasets/raw/TH1KH_512
|
54 |
+
ref_id_mode: first_frame
|
55 |
+
resume_from_checkpoint: 0
|
56 |
+
sample_min_length: 32
|
57 |
+
save_best: false
|
58 |
+
save_codes:
|
59 |
+
- tasks
|
60 |
+
- modules
|
61 |
+
- egs
|
62 |
+
save_gt: true
|
63 |
+
scheduler: exponential
|
64 |
+
seed: 9999
|
65 |
+
smo_win_size: 5
|
66 |
+
split_seed: 999
|
67 |
+
syncnet_ckpt_dir: checkpoints/0904_syncnet/syncnet_hubert_vox2
|
68 |
+
task_cls: tasks.os_avatar.icl_audio2secc_task.Audio2SECCTask
|
69 |
+
tb_log_interval: 100
|
70 |
+
total_process: 1
|
71 |
+
use_aux_features: true
|
72 |
+
use_aux_loss_on_x1: true
|
73 |
+
use_eye_amp_embed: false
|
74 |
+
use_flow: true
|
75 |
+
use_fork: true
|
76 |
+
use_kv_dataset: true
|
77 |
+
use_mouth_amp_embed: true
|
78 |
+
use_pitch: true
|
79 |
+
val_check_interval: 2000
|
80 |
+
valid_infer_interval: 2000
|
81 |
+
valid_monitor_key: val_loss
|
82 |
+
valid_monitor_mode: min
|
83 |
+
warmup_updates: 1000
|
84 |
+
weight_decay: 0
|
85 |
+
work_dir: checkpoints/240112_audio2secc/icl_audio2secc_vox2_cmlr
|
86 |
+
x_multiply: 16
|
checkpoints/checkpoints/240112_icl_audio2secc_vox2_cmlr/model_ckpt_steps_1856000.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:367167db3b25fe07de9255871ce7813158551c4b00bfadd7334a671648924a2e
|
3 |
+
size 462941609
|
checkpoints/checkpoints/mimictalk_orig/os_secc2plane_torso/config.yaml
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accumulate_grad_batches: 1
|
2 |
+
add_ffhq_singe_disc: false
|
3 |
+
also_update_decoder: false
|
4 |
+
amp: false
|
5 |
+
base_channel: 32768
|
6 |
+
base_config:
|
7 |
+
- ./secc_img2plane.yaml
|
8 |
+
batch_size: 1
|
9 |
+
binary_data_dir: data/binary/CelebV-HQ
|
10 |
+
blur_fade_kimg: 20
|
11 |
+
blur_init_sigma: 10
|
12 |
+
blur_raw_target: true
|
13 |
+
box_warp: 1
|
14 |
+
ckpt_milestone_interval: 50000
|
15 |
+
clip_grad_norm: 1.0
|
16 |
+
clip_grad_value: 0
|
17 |
+
cond_hid_dim: 32
|
18 |
+
cond_out_dim: 16
|
19 |
+
cond_type: idexp_lm3d_normalized
|
20 |
+
debug: false
|
21 |
+
density_reg_p_dist: 0.004
|
22 |
+
disable_highreso_at_stage1: true
|
23 |
+
disc_c_noise: 1.0
|
24 |
+
disc_cond_mode: none
|
25 |
+
ds_name: Concat_VFHQ_CelebVHQ_TH1KH_RAVDESS
|
26 |
+
ema_interval: 400
|
27 |
+
enable_rescale_plane_regulation: false
|
28 |
+
eval_max_batches: 100
|
29 |
+
ffhq_disc_inp_mode: eg3d_gen
|
30 |
+
final_resolution: 512
|
31 |
+
flipped_to_world_coord: true
|
32 |
+
fuse_with_deform_source: false
|
33 |
+
gen_cond_mode: none
|
34 |
+
generator_condition_on_pose: true
|
35 |
+
gpc_reg_fade_kimg: 1000
|
36 |
+
gpc_reg_prob: 0.8
|
37 |
+
group_size_for_mini_batch_std: 2
|
38 |
+
htbsr_head_threshold: 1.0
|
39 |
+
htbsr_head_weight_fuse_mode: v2
|
40 |
+
img2plane_backbone_mode: composite
|
41 |
+
img2plane_backbone_scale: standard
|
42 |
+
init_from_ckpt: checkpoints/240118_os_secc2planes/os_secc2plane_pertubeBlink0.05_pertubeSECC0.05
|
43 |
+
lam_occlusion_2_reg_l1: 0.0
|
44 |
+
lam_occlusion_reg_l1: 0.0
|
45 |
+
lam_occlusion_weights_entropy: 0.001
|
46 |
+
lambda_G_adversarial_adv: 1.0
|
47 |
+
lambda_G_supervise_adv: 1.0
|
48 |
+
lambda_G_supervise_mse: 1.0
|
49 |
+
lambda_G_supervise_mse_raw: 1.0
|
50 |
+
lambda_density_reg: 0.25
|
51 |
+
lambda_ffhq_mv_adv: 0.002
|
52 |
+
lambda_gradient_penalty: 1.0
|
53 |
+
lambda_mse: 1.0
|
54 |
+
lambda_mse_depth: 0.0
|
55 |
+
lambda_th1kh_mv_adv: 0.003
|
56 |
+
lambda_weights_entropy: 0.01
|
57 |
+
lambda_weights_l1: 0.1
|
58 |
+
load_ckpt: ''
|
59 |
+
lpips_mode: vgg19_v2
|
60 |
+
lr_d: 0.0002
|
61 |
+
lr_decay_interval: 5000
|
62 |
+
lr_decay_rate: 0.95
|
63 |
+
lr_g: 1.0e-05
|
64 |
+
lr_lambda_pertube_secc: 0.01
|
65 |
+
lr_mul_cano_img2plane: 1.0
|
66 |
+
mapping_network_depth: 2
|
67 |
+
max_channel: 512
|
68 |
+
max_updates: 100000
|
69 |
+
mimic_plane: false
|
70 |
+
min_rescale_factor: 0.25
|
71 |
+
motion_smo_win_size: 5
|
72 |
+
neural_rendering_resolution: 128
|
73 |
+
normalize_cond: false
|
74 |
+
normalize_radius: false
|
75 |
+
not_save_modules:
|
76 |
+
- criterion_lpips
|
77 |
+
- eg3d_model
|
78 |
+
num_ckpt_keep: 1
|
79 |
+
num_fp16_layers_in_discriminator: 4
|
80 |
+
num_fp16_layers_in_generator: 0
|
81 |
+
num_fp16_layers_in_super_resolution: 4
|
82 |
+
num_samples_coarse: 48
|
83 |
+
num_samples_fine: 48
|
84 |
+
num_sanity_val_steps: 1
|
85 |
+
num_valid_plots: 25
|
86 |
+
num_workers: 8
|
87 |
+
occlusion_fuse: true
|
88 |
+
ones_ws_for_sr: true
|
89 |
+
optimizer_adam_beta1_d: 0.0
|
90 |
+
optimizer_adam_beta1_g: 0.0
|
91 |
+
optimizer_adam_beta2_d: 0.99
|
92 |
+
optimizer_adam_beta2_g: 0.99
|
93 |
+
phase1_plane_fusion_mode: add
|
94 |
+
pncc_cond_mode: cano_src_tgt
|
95 |
+
pretrained_eg3d_ckpt: /mnt/bn/ailabrenyi/entries/yezhenhui/projects/GeneFace_private/checkpoints/0628_img2planes/eg3d_baseline_run2/model_ckpt_steps_100000.ckpt
|
96 |
+
print_nan_grads: false
|
97 |
+
process_id: 0
|
98 |
+
processed_data_dir: data/processed/videos
|
99 |
+
random_sample_pose: true
|
100 |
+
raw_data_dir: /home/tiger/datasets/raw/FFHQ
|
101 |
+
ray_far: auto
|
102 |
+
ray_near: auto
|
103 |
+
reg_interval_d: 16
|
104 |
+
reg_interval_g: 4
|
105 |
+
reg_interval_g_cond: 4
|
106 |
+
reload_head_ckpt: ''
|
107 |
+
resume_from_checkpoint: 0
|
108 |
+
save_best: true
|
109 |
+
save_codes:
|
110 |
+
- tasks
|
111 |
+
- modules
|
112 |
+
- egs
|
113 |
+
secc_pertube_mode: randn
|
114 |
+
secc_pertube_randn_scale: 0.01
|
115 |
+
secc_segformer_scale: b0
|
116 |
+
seed: 9999
|
117 |
+
seg_out_mode: head
|
118 |
+
smo_win_size: 5
|
119 |
+
split_seed: 999
|
120 |
+
sr_type: vanilla
|
121 |
+
start_adv_iters: 40000
|
122 |
+
target_pertube_blink_secc_loss: 0.05
|
123 |
+
target_pertube_secc_loss: 0.05
|
124 |
+
task_cls: tasks.os_avatar.secc_img2plane_torso_task.SECC_Img2PlaneEG3D_TorsoTask
|
125 |
+
tb_log_interval: 100
|
126 |
+
torch_compile: true
|
127 |
+
torso_kp_num: 4
|
128 |
+
torso_model_version: v2
|
129 |
+
torso_occlusion_reg_unmask_factor: 0.3
|
130 |
+
torso_ref_segout_mode: torso
|
131 |
+
total_process: 1
|
132 |
+
triplane_depth: 3
|
133 |
+
triplane_feature_type: trigrid_v2
|
134 |
+
triplane_hid_dim: 32
|
135 |
+
two_stage_training: true
|
136 |
+
update_on_th1kh_samples: false
|
137 |
+
update_src2src_interval: 4
|
138 |
+
use_kv_dataset: true
|
139 |
+
use_motion_smo_net: false
|
140 |
+
use_mse: false
|
141 |
+
use_th1kh_disc: false
|
142 |
+
use_th1kh_mv_adv: false
|
143 |
+
val_check_interval: 2000
|
144 |
+
valid_infer_interval: 2000
|
145 |
+
valid_monitor_key: val_loss
|
146 |
+
valid_monitor_mode: min
|
147 |
+
video_id: May
|
148 |
+
w_dim: 512
|
149 |
+
warmup_updates: 4000
|
150 |
+
weight_fuse: true
|
151 |
+
work_dir: checkpoints/240120_os_secc2planes_torso/os_secc2plane_torso_htbsrFusev2_htbsrThres1.0
|
152 |
+
z_dim: 512
|
checkpoints/checkpoints/mimictalk_orig/os_secc2plane_torso/model_ckpt_steps_100000.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:757c1b73d938da0ec3fba555a155a31ac803ddc8d343bba2a3c69845844b213a
|
3 |
+
size 1414788463
|
checkpoints/checkpoints/pretrained_ckpts/mit_b0.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df468f7f13c4186f25bd3e2caf09e4f927b5b5ac0abccac84011dae747d4c49c
|
3 |
+
size 14331578
|
checkpoints/checkpoints_mimictalk/German_20s/config.yaml
ADDED
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accumulate_grad_batches: 1
|
2 |
+
add_ffhq_singe_disc: false
|
3 |
+
also_update_decoder: false
|
4 |
+
amp: false
|
5 |
+
base_channel: 32768
|
6 |
+
base_config:
|
7 |
+
- ./secc_img2plane.yaml
|
8 |
+
batch_size: 1
|
9 |
+
binary_data_dir: data/binary/CelebV-HQ
|
10 |
+
blur_fade_kimg: 20
|
11 |
+
blur_init_sigma: 10
|
12 |
+
blur_raw_target: true
|
13 |
+
box_warp: 1
|
14 |
+
ckpt_milestone_interval: 50000
|
15 |
+
clip_grad_norm: 1.0
|
16 |
+
clip_grad_value: 0
|
17 |
+
cond_hid_dim: 32
|
18 |
+
cond_out_dim: 16
|
19 |
+
cond_type: idexp_lm3d_normalized
|
20 |
+
debug: false
|
21 |
+
density_reg_p_dist: 0.004
|
22 |
+
disable_highreso_at_stage1: true
|
23 |
+
disc_c_noise: 1.0
|
24 |
+
disc_cond_mode: none
|
25 |
+
ds_name: Concat_VFHQ_CelebVHQ_TH1KH_RAVDESS
|
26 |
+
ema_interval: 400
|
27 |
+
enable_rescale_plane_regulation: false
|
28 |
+
eval_max_batches: 100
|
29 |
+
ffhq_disc_inp_mode: eg3d_gen
|
30 |
+
final_resolution: 512
|
31 |
+
flipped_to_world_coord: true
|
32 |
+
fuse_with_deform_source: false
|
33 |
+
gen_cond_mode: none
|
34 |
+
generator_condition_on_pose: true
|
35 |
+
gpc_reg_fade_kimg: 1000
|
36 |
+
gpc_reg_prob: 0.8
|
37 |
+
group_size_for_mini_batch_std: 2
|
38 |
+
htbsr_head_threshold: 1.0
|
39 |
+
htbsr_head_weight_fuse_mode: v2
|
40 |
+
img2plane_backbone_mode: composite
|
41 |
+
img2plane_backbone_scale: standard
|
42 |
+
init_from_ckpt: checkpoints/240118_os_secc2planes/os_secc2plane_pertubeBlink0.05_pertubeSECC0.05
|
43 |
+
lam_occlusion_2_reg_l1: 0.0
|
44 |
+
lam_occlusion_reg_l1: 0.0
|
45 |
+
lam_occlusion_weights_entropy: 0.001
|
46 |
+
lambda_G_adversarial_adv: 1.0
|
47 |
+
lambda_G_supervise_adv: 1.0
|
48 |
+
lambda_G_supervise_mse: 1.0
|
49 |
+
lambda_G_supervise_mse_raw: 1.0
|
50 |
+
lambda_density_reg: 0.25
|
51 |
+
lambda_ffhq_mv_adv: 0.002
|
52 |
+
lambda_gradient_penalty: 1.0
|
53 |
+
lambda_mse: 1.0
|
54 |
+
lambda_mse_depth: 0.0
|
55 |
+
lambda_th1kh_mv_adv: 0.003
|
56 |
+
lambda_weights_entropy: 0.01
|
57 |
+
lambda_weights_l1: 0.1
|
58 |
+
load_ckpt: ''
|
59 |
+
lpips_mode: vgg19_v2
|
60 |
+
lr_d: 0.0002
|
61 |
+
lr_decay_interval: 5000
|
62 |
+
lr_decay_rate: 0.95
|
63 |
+
lr_g: 1.0e-05
|
64 |
+
lr_lambda_pertube_secc: 0.01
|
65 |
+
lr_mul_cano_img2plane: 1.0
|
66 |
+
mapping_network_depth: 2
|
67 |
+
max_channel: 512
|
68 |
+
max_updates: 100000
|
69 |
+
mimic_plane: false
|
70 |
+
min_rescale_factor: 0.25
|
71 |
+
motion_smo_win_size: 5
|
72 |
+
neural_rendering_resolution: 128
|
73 |
+
normalize_cond: false
|
74 |
+
normalize_radius: false
|
75 |
+
not_save_modules:
|
76 |
+
- criterion_lpips
|
77 |
+
- eg3d_model
|
78 |
+
num_ckpt_keep: 1
|
79 |
+
num_fp16_layers_in_discriminator: 4
|
80 |
+
num_fp16_layers_in_generator: 0
|
81 |
+
num_fp16_layers_in_super_resolution: 4
|
82 |
+
num_samples_coarse: 48
|
83 |
+
num_samples_fine: 48
|
84 |
+
num_sanity_val_steps: 1
|
85 |
+
num_valid_plots: 25
|
86 |
+
num_workers: 8
|
87 |
+
occlusion_fuse: true
|
88 |
+
ones_ws_for_sr: true
|
89 |
+
optimizer_adam_beta1_d: 0.0
|
90 |
+
optimizer_adam_beta1_g: 0.0
|
91 |
+
optimizer_adam_beta2_d: 0.99
|
92 |
+
optimizer_adam_beta2_g: 0.99
|
93 |
+
phase1_plane_fusion_mode: add
|
94 |
+
pncc_cond_mode: cano_src_tgt
|
95 |
+
pretrained_eg3d_ckpt: /mnt/bn/ailabrenyi/entries/yezhenhui/projects/GeneFace_private/checkpoints/0628_img2planes/eg3d_baseline_run2/model_ckpt_steps_100000.ckpt
|
96 |
+
print_nan_grads: false
|
97 |
+
process_id: 0
|
98 |
+
processed_data_dir: data/processed/videos
|
99 |
+
random_sample_pose: true
|
100 |
+
raw_data_dir: /home/tiger/datasets/raw/FFHQ
|
101 |
+
ray_far: auto
|
102 |
+
ray_near: auto
|
103 |
+
reg_interval_d: 16
|
104 |
+
reg_interval_g: 4
|
105 |
+
reg_interval_g_cond: 4
|
106 |
+
reload_head_ckpt: ''
|
107 |
+
resume_from_checkpoint: 0
|
108 |
+
save_best: true
|
109 |
+
save_codes:
|
110 |
+
- tasks
|
111 |
+
- modules
|
112 |
+
- egs
|
113 |
+
secc_pertube_mode: randn
|
114 |
+
secc_pertube_randn_scale: 0.01
|
115 |
+
secc_segformer_scale: b0
|
116 |
+
seed: 9999
|
117 |
+
seg_out_mode: head
|
118 |
+
smo_win_size: 5
|
119 |
+
split_seed: 999
|
120 |
+
sr_type: vanilla
|
121 |
+
start_adv_iters: 40000
|
122 |
+
target_pertube_blink_secc_loss: 0.05
|
123 |
+
target_pertube_secc_loss: 0.05
|
124 |
+
task_cls: tasks.os_avatar.secc_img2plane_torso_task.SECC_Img2PlaneEG3D_TorsoTask
|
125 |
+
tb_log_interval: 100
|
126 |
+
torch_compile: true
|
127 |
+
torso_kp_num: 4
|
128 |
+
torso_model_version: v2
|
129 |
+
torso_occlusion_reg_unmask_factor: 0.3
|
130 |
+
torso_ref_segout_mode: torso
|
131 |
+
total_process: 1
|
132 |
+
triplane_depth: 3
|
133 |
+
triplane_feature_type: trigrid_v2
|
134 |
+
triplane_hid_dim: 32
|
135 |
+
two_stage_training: true
|
136 |
+
update_on_th1kh_samples: false
|
137 |
+
update_src2src_interval: 4
|
138 |
+
use_kv_dataset: true
|
139 |
+
use_motion_smo_net: false
|
140 |
+
use_mse: false
|
141 |
+
use_th1kh_disc: false
|
142 |
+
use_th1kh_mv_adv: false
|
143 |
+
val_check_interval: 2000
|
144 |
+
valid_infer_interval: 2000
|
145 |
+
valid_monitor_key: val_loss
|
146 |
+
valid_monitor_mode: min
|
147 |
+
video_id: May
|
148 |
+
w_dim: 512
|
149 |
+
warmup_updates: 4000
|
150 |
+
weight_fuse: true
|
151 |
+
work_dir: checkpoints/240120_os_secc2planes_torso/os_secc2plane_torso_htbsrFusev2_htbsrThres1.0
|
152 |
+
z_dim: 512
|
153 |
+
|
154 |
+
lora_r: 2
|
155 |
+
lora_mode: secc2plane_sr
|
checkpoints/checkpoints_mimictalk/German_20s/model_ckpt_steps_10000.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66f6e77b1c1f2fc548c3c94e6357800ad54819c70817a2ea726ff59b44ca8028
|
3 |
+
size 829401932
|
deep_3drecon/BFM/.gitkeep
ADDED
File without changes
|
deep_3drecon/BFM/01_MorphableModel.mat
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37b1f0742db356a3b1568a8365a06f5b0fe0ab687ac1c3068c803666cbd4d8e2
|
3 |
+
size 240875364
|
deep_3drecon/BFM/BFM_exp_idx.mat
ADDED
Binary file (91.9 kB). View file
|
|
deep_3drecon/BFM/BFM_front_idx.mat
ADDED
Binary file (44.9 kB). View file
|
|
deep_3drecon/BFM/BFM_model_front.mat
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7655695ee8cf64a23db9a3cff730b4a0573160690805d1861b7d651f1596319
|
3 |
+
size 127170280
|
deep_3drecon/BFM/Exp_Pca.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7f31380e6cbdaf2aeec698db220bac4f221946e4d551d88c092d47ec49b1726
|
3 |
+
size 51086404
|
deep_3drecon/BFM/basel_53201.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
deep_3drecon/BFM/facemodel_info.mat
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:529398f76619ae7e22f43c25dd60a2473bcc2bcc8c894fd9c613c68624ce1c04
|
3 |
+
size 738861
|
deep_3drecon/BFM/index_mp468_from_mesh35709.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:053b8cce8424b722db6ec5b068514eb007a23b4c5afd629449eb08746e643211
|
3 |
+
size 3872
|
deep_3drecon/BFM/index_mp468_from_mesh35709_v1.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d238a90df0c55075c9cea43dab76348421379a75c204931e34dbd2c11fb4b65
|
3 |
+
size 3872
|
deep_3drecon/BFM/index_mp468_from_mesh35709_v2.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe95e2bb10ac1e54804006184d7de3c5ccd0eb98a5f1bd28e00b9f3569f6ce5a
|
3 |
+
size 3872
|
deep_3drecon/BFM/index_mp468_from_mesh35709_v3.1.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:053b8cce8424b722db6ec5b068514eb007a23b4c5afd629449eb08746e643211
|
3 |
+
size 3872
|
deep_3drecon/BFM/index_mp468_from_mesh35709_v3.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b007b3619dd02892b38349ba3d4b10e32bc2eff201c265f25d6ed62f67dbd51
|
3 |
+
size 3872
|
deep_3drecon/BFM/select_vertex_id.mat
ADDED
Binary file (62.3 kB). View file
|
|
deep_3drecon/BFM/similarity_Lm3D_all.mat
ADDED
Binary file (994 Bytes). View file
|
|
deep_3drecon/BFM/std_exp.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
453980 257264 263068 211890 135873 184721 47055.6 72732 62787.4 106226 56708.5 51439.8 34887.1 44378.7 51813.4 31030.7 23354.9 23128.1 19400 21827.6 22767.7 22057.4 19894.3 16172.8 17142.7 10035.3 14727.5 12972.5 10763.8 8953.93 8682.62 8941.81 6342.3 5205.3 7065.65 6083.35 6678.88 4666.63 5082.89 5134.76 4908.16 3964.93 3739.95 3180.09 2470.45 1866.62 1624.71 2423.74 1668.53 1471.65 1194.52 782.102 815.044 835.782 834.937 744.496 575.146 633.76 705.685 753.409 620.306 673.326 766.189 619.866 559.93 357.264 396.472 556.849 455.048 460.592 400.735 326.702 279.428 291.535 326.584 305.664 287.816 283.642 276.19
|
deep_3drecon/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from .reconstructor import *
|
deep_3drecon/bfm_left_eye_faces.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9651756ea2c0fac069a1edf858ed1f125eddc358fa74c529a370c1e7b5730d28
|
3 |
+
size 4680
|
deep_3drecon/bfm_right_eye_faces.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28cb5bbacf578d30a3d5006ec28c617fe5a3ecaeeeb87d9433a884e0f0301a2e
|
3 |
+
size 4648
|
deep_3drecon/data_preparation.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""This script is the data preparation script for Deep3DFaceRecon_pytorch
|
2 |
+
"""
|
3 |
+
|
4 |
+
import os
|
5 |
+
import numpy as np
|
6 |
+
import argparse
|
7 |
+
from util.detect_lm68 import detect_68p,load_lm_graph
|
8 |
+
from util.skin_mask import get_skin_mask
|
9 |
+
from util.generate_list import check_list, write_list
|
10 |
+
import warnings
|
11 |
+
warnings.filterwarnings("ignore")
|
12 |
+
|
13 |
+
parser = argparse.ArgumentParser()
|
14 |
+
parser.add_argument('--data_root', type=str, default='datasets', help='root directory for training data')
|
15 |
+
parser.add_argument('--img_folder', nargs="+", required=True, help='folders of training images')
|
16 |
+
parser.add_argument('--mode', type=str, default='train', help='train or val')
|
17 |
+
opt = parser.parse_args()
|
18 |
+
|
19 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
20 |
+
|
21 |
+
def data_prepare(folder_list,mode):
|
22 |
+
|
23 |
+
lm_sess,input_op,output_op = load_lm_graph('./checkpoints/lm_model/68lm_detector.pb') # load a tensorflow version 68-landmark detector
|
24 |
+
|
25 |
+
for img_folder in folder_list:
|
26 |
+
detect_68p(img_folder,lm_sess,input_op,output_op) # detect landmarks for images
|
27 |
+
get_skin_mask(img_folder) # generate skin attention mask for images
|
28 |
+
|
29 |
+
# create files that record path to all training data
|
30 |
+
msks_list = []
|
31 |
+
for img_folder in folder_list:
|
32 |
+
path = os.path.join(img_folder, 'mask')
|
33 |
+
msks_list += ['/'.join([img_folder, 'mask', i]) for i in sorted(os.listdir(path)) if 'jpg' in i or
|
34 |
+
'png' in i or 'jpeg' in i or 'PNG' in i]
|
35 |
+
|
36 |
+
imgs_list = [i.replace('mask/', '') for i in msks_list]
|
37 |
+
lms_list = [i.replace('mask', 'landmarks') for i in msks_list]
|
38 |
+
lms_list = ['.'.join(i.split('.')[:-1]) + '.txt' for i in lms_list]
|
39 |
+
|
40 |
+
lms_list_final, imgs_list_final, msks_list_final = check_list(lms_list, imgs_list, msks_list) # check if the path is valid
|
41 |
+
write_list(lms_list_final, imgs_list_final, msks_list_final, mode=mode) # save files
|
42 |
+
|
43 |
+
if __name__ == '__main__':
|
44 |
+
print('Datasets:',opt.img_folder)
|
45 |
+
data_prepare([os.path.join(opt.data_root,folder) for folder in opt.img_folder],opt.mode)
|
deep_3drecon/deep_3drecon_models/__init__.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""This package contains modules related to objective functions, optimizations, and network architectures.
|
2 |
+
|
3 |
+
To add a custom model class called 'dummy', you need to add a file called 'dummy_model.py' and define a subclass DummyModel inherited from BaseModel.
|
4 |
+
You need to implement the following five functions:
|
5 |
+
-- <__init__>: initialize the class; first call BaseModel.__init__(self, opt).
|
6 |
+
-- <set_input>: unpack data from dataset and apply preprocessing.
|
7 |
+
-- <forward>: produce intermediate results.
|
8 |
+
-- <optimize_parameters>: calculate loss, gradients, and update network weights.
|
9 |
+
-- <modify_commandline_options>: (optionally) add model-specific options and set default options.
|
10 |
+
|
11 |
+
In the function <__init__>, you need to define four lists:
|
12 |
+
-- self.loss_names (str list): specify the training losses that you want to plot and save.
|
13 |
+
-- self.model_names (str list): define networks used in our training.
|
14 |
+
-- self.visual_names (str list): specify the images that you want to display and save.
|
15 |
+
-- self.optimizers (optimizer list): define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an usage.
|
16 |
+
|
17 |
+
Now you can use the model class by specifying flag '--model dummy'.
|
18 |
+
See our template model class 'template_model.py' for more details.
|
19 |
+
"""
|
20 |
+
|
21 |
+
import importlib
|
22 |
+
from .base_model import BaseModel
|
23 |
+
|
24 |
+
|
25 |
+
def find_model_using_name(model_name):
|
26 |
+
"""Import the module "models/[model_name]_model.py".
|
27 |
+
|
28 |
+
In the file, the class called DatasetNameModel() will
|
29 |
+
be instantiated. It has to be a subclass of BaseModel,
|
30 |
+
and it is case-insensitive.
|
31 |
+
"""
|
32 |
+
model_filename = "deep_3drecon_models." + model_name + "_model"
|
33 |
+
modellib = importlib.import_module(model_filename)
|
34 |
+
model = None
|
35 |
+
target_model_name = model_name.replace('_', '') + 'model'
|
36 |
+
for name, cls in modellib.__dict__.items():
|
37 |
+
if name.lower() == target_model_name.lower() \
|
38 |
+
and issubclass(cls, BaseModel):
|
39 |
+
model = cls
|
40 |
+
|
41 |
+
if model is None:
|
42 |
+
print("In %s.py, there should be a subclass of BaseModel with class name that matches %s in lowercase." % (model_filename, target_model_name))
|
43 |
+
exit(0)
|
44 |
+
|
45 |
+
return model
|
46 |
+
|
47 |
+
|
48 |
+
def get_option_setter(model_name):
|
49 |
+
"""Return the static method <modify_commandline_options> of the model class."""
|
50 |
+
model_class = find_model_using_name(model_name)
|
51 |
+
return model_class.modify_commandline_options
|
52 |
+
|
53 |
+
|
54 |
+
def create_model(opt):
|
55 |
+
"""Create a model given the option.
|
56 |
+
|
57 |
+
This function warps the class CustomDatasetDataLoader.
|
58 |
+
This is the main interface between this package and 'train.py'/'test.py'
|
59 |
+
|
60 |
+
Example:
|
61 |
+
>>> from models import create_model
|
62 |
+
>>> model = create_model(opt)
|
63 |
+
"""
|
64 |
+
model = find_model_using_name(opt.model)
|
65 |
+
instance = model(opt)
|
66 |
+
print("model [%s] was created" % type(instance).__name__)
|
67 |
+
return instance
|
deep_3drecon/deep_3drecon_models/arcface_torch/README.md
ADDED
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Distributed Arcface Training in Pytorch
|
2 |
+
|
3 |
+
The "arcface_torch" repository is the official implementation of the ArcFace algorithm. It supports distributed and sparse training with multiple distributed training examples, including several memory-saving techniques such as mixed precision training and gradient checkpointing. It also supports training for ViT models and datasets including WebFace42M and Glint360K, two of the largest open-source datasets. Additionally, the repository comes with a built-in tool for converting to ONNX format, making it easy to submit to MFR evaluation systems.
|
4 |
+
|
5 |
+
[](https://paperswithcode.com/sota/face-verification-on-ijb-c?p=killing-two-birds-with-one-stone-efficient)
|
6 |
+
[](https://paperswithcode.com/sota/face-verification-on-ijb-b?p=killing-two-birds-with-one-stone-efficient)
|
7 |
+
[](https://paperswithcode.com/sota/face-verification-on-agedb-30?p=killing-two-birds-with-one-stone-efficient)
|
8 |
+
[](https://paperswithcode.com/sota/face-verification-on-cfp-fp?p=killing-two-birds-with-one-stone-efficient)
|
9 |
+
|
10 |
+
## Requirements
|
11 |
+
|
12 |
+
To avail the latest features of PyTorch, we have upgraded to version 1.12.0.
|
13 |
+
|
14 |
+
- Install [PyTorch](https://pytorch.org/get-started/previous-versions/) (torch>=1.12.0).
|
15 |
+
- (Optional) Install [DALI](https://docs.nvidia.com/deeplearning/dali/user-guide/docs/), our doc for [install_dali.md](docs/install_dali.md).
|
16 |
+
- `pip install -r requirement.txt`.
|
17 |
+
|
18 |
+
## How to Training
|
19 |
+
|
20 |
+
To train a model, execute the `train.py` script with the path to the configuration files. The sample commands provided below demonstrate the process of conducting distributed training.
|
21 |
+
|
22 |
+
### 1. To run on one GPU:
|
23 |
+
|
24 |
+
```shell
|
25 |
+
python train_v2.py configs/ms1mv3_r50_onegpu
|
26 |
+
```
|
27 |
+
|
28 |
+
Note:
|
29 |
+
It is not recommended to use a single GPU for training, as this may result in longer training times and suboptimal performance. For best results, we suggest using multiple GPUs or a GPU cluster.
|
30 |
+
|
31 |
+
|
32 |
+
### 2. To run on a machine with 8 GPUs:
|
33 |
+
|
34 |
+
```shell
|
35 |
+
torchrun --nproc_per_node=8 train.py configs/ms1mv3_r50
|
36 |
+
```
|
37 |
+
|
38 |
+
### 3. To run on 2 machines with 8 GPUs each:
|
39 |
+
|
40 |
+
Node 0:
|
41 |
+
|
42 |
+
```shell
|
43 |
+
torchrun --nproc_per_node=8 --nnodes=2 --node_rank=0 --master_addr="ip1" --master_port=12581 train.py configs/wf42m_pfc02_16gpus_r100
|
44 |
+
```
|
45 |
+
|
46 |
+
Node 1:
|
47 |
+
|
48 |
+
```shell
|
49 |
+
torchrun --nproc_per_node=8 --nnodes=2 --node_rank=1 --master_addr="ip1" --master_port=12581 train.py configs/wf42m_pfc02_16gpus_r100
|
50 |
+
```
|
51 |
+
|
52 |
+
### 4. Run ViT-B on a machine with 24k batchsize:
|
53 |
+
|
54 |
+
```shell
|
55 |
+
torchrun --nproc_per_node=8 train_v2.py configs/wf42m_pfc03_40epoch_8gpu_vit_b
|
56 |
+
```
|
57 |
+
|
58 |
+
|
59 |
+
## Download Datasets or Prepare Datasets
|
60 |
+
- [MS1MV2](https://github.com/deepinsight/insightface/tree/master/recognition/_datasets_#ms1m-arcface-85k-ids58m-images-57) (87k IDs, 5.8M images)
|
61 |
+
- [MS1MV3](https://github.com/deepinsight/insightface/tree/master/recognition/_datasets_#ms1m-retinaface) (93k IDs, 5.2M images)
|
62 |
+
- [Glint360K](https://github.com/deepinsight/insightface/tree/master/recognition/partial_fc#4-download) (360k IDs, 17.1M images)
|
63 |
+
- [WebFace42M](docs/prepare_webface42m.md) (2M IDs, 42.5M images)
|
64 |
+
- [Your Dataset, Click Here!](docs/prepare_custom_dataset.md)
|
65 |
+
|
66 |
+
Note:
|
67 |
+
If you want to use DALI for data reading, please use the script 'scripts/shuffle_rec.py' to shuffle the InsightFace style rec before using it.
|
68 |
+
Example:
|
69 |
+
|
70 |
+
`python scripts/shuffle_rec.py ms1m-retinaface-t1`
|
71 |
+
|
72 |
+
You will get the "shuffled_ms1m-retinaface-t1" folder, where the samples in the "train.rec" file are shuffled.
|
73 |
+
|
74 |
+
|
75 |
+
## Model Zoo
|
76 |
+
|
77 |
+
- The models are available for non-commercial research purposes only.
|
78 |
+
- All models can be found in here.
|
79 |
+
- [Baidu Yun Pan](https://pan.baidu.com/s/1CL-l4zWqsI1oDuEEYVhj-g): e8pw
|
80 |
+
- [OneDrive](https://1drv.ms/u/s!AswpsDO2toNKq0lWY69vN58GR6mw?e=p9Ov5d)
|
81 |
+
|
82 |
+
### Performance on IJB-C and [**ICCV2021-MFR**](https://github.com/deepinsight/insightface/blob/master/challenges/mfr/README.md)
|
83 |
+
|
84 |
+
ICCV2021-MFR testset consists of non-celebrities so we can ensure that it has very few overlap with public available face
|
85 |
+
recognition training set, such as MS1M and CASIA as they mostly collected from online celebrities.
|
86 |
+
As the result, we can evaluate the FAIR performance for different algorithms.
|
87 |
+
|
88 |
+
For **ICCV2021-MFR-ALL** set, TAR is measured on all-to-all 1:1 protocal, with FAR less than 0.000001(e-6). The
|
89 |
+
globalised multi-racial testset contains 242,143 identities and 1,624,305 images.
|
90 |
+
|
91 |
+
|
92 |
+
#### 1. Training on Single-Host GPU
|
93 |
+
|
94 |
+
| Datasets | Backbone | **MFR-ALL** | IJB-C(1E-4) | IJB-C(1E-5) | log |
|
95 |
+
|:---------------|:--------------------|:------------|:------------|:------------|:------------------------------------------------------------------------------------------------------------------------------------|
|
96 |
+
| MS1MV2 | mobilefacenet-0.45G | 62.07 | 93.61 | 90.28 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv2_mbf/training.log) |
|
97 |
+
| MS1MV2 | r50 | 75.13 | 95.97 | 94.07 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv2_r50/training.log) |
|
98 |
+
| MS1MV2 | r100 | 78.12 | 96.37 | 94.27 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv2_r100/training.log) |
|
99 |
+
| MS1MV3 | mobilefacenet-0.45G | 63.78 | 94.23 | 91.33 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv3_mbf/training.log) |
|
100 |
+
| MS1MV3 | r50 | 79.14 | 96.37 | 94.47 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv3_r50/training.log) |
|
101 |
+
| MS1MV3 | r100 | 81.97 | 96.85 | 95.02 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv3_r100/training.log) |
|
102 |
+
| Glint360K | mobilefacenet-0.45G | 70.18 | 95.04 | 92.62 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/glint360k_mbf/training.log) |
|
103 |
+
| Glint360K | r50 | 86.34 | 97.16 | 95.81 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/glint360k_r50/training.log) |
|
104 |
+
| Glint360k | r100 | 89.52 | 97.55 | 96.38 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/glint360k_r100/training.log) |
|
105 |
+
| WF4M | r100 | 89.87 | 97.19 | 95.48 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/wf4m_r100/training.log) |
|
106 |
+
| WF12M-PFC-0.2 | r100 | 94.75 | 97.60 | 95.90 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/wf12m_pfc02_r100/training.log) |
|
107 |
+
| WF12M-PFC-0.3 | r100 | 94.71 | 97.64 | 96.01 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/wf12m_pfc03_r100/training.log) |
|
108 |
+
| WF12M | r100 | 94.69 | 97.59 | 95.97 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/wf12m_r100/training.log) |
|
109 |
+
| WF42M-PFC-0.2 | r100 | 96.27 | 97.70 | 96.31 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/wf42m_pfc02_r100/training.log) |
|
110 |
+
| WF42M-PFC-0.2 | ViT-T-1.5G | 92.04 | 97.27 | 95.68 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/wf42m_pfc02_40epoch_8gpu_vit_t/training.log) |
|
111 |
+
| WF42M-PFC-0.3 | ViT-B-11G | 97.16 | 97.91 | 97.05 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/pfc03_wf42m_vit_b_8gpu/training.log) |
|
112 |
+
|
113 |
+
#### 2. Training on Multi-Host GPU
|
114 |
+
|
115 |
+
| Datasets | Backbone(bs*gpus) | **MFR-ALL** | IJB-C(1E-4) | IJB-C(1E-5) | Throughout | log |
|
116 |
+
|:-----------------|:------------------|:------------|:------------|:------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------|
|
117 |
+
| WF42M-PFC-0.2 | r50(512*8) | 93.83 | 97.53 | 96.16 | ~5900 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/webface42m_r50_bs4k_pfc02/training.log) |
|
118 |
+
| WF42M-PFC-0.2 | r50(512*16) | 93.96 | 97.46 | 96.12 | ~11000 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/webface42m_r50_lr01_pfc02_bs8k_16gpus/training.log) |
|
119 |
+
| WF42M-PFC-0.2 | r50(128*32) | 94.04 | 97.48 | 95.94 | ~17000 | click me |
|
120 |
+
| WF42M-PFC-0.2 | r100(128*16) | 96.28 | 97.80 | 96.57 | ~5200 | click me |
|
121 |
+
| WF42M-PFC-0.2 | r100(256*16) | 96.69 | 97.85 | 96.63 | ~5200 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/webface42m_r100_bs4k_pfc02/training.log) |
|
122 |
+
| WF42M-PFC-0.0018 | r100(512*32) | 93.08 | 97.51 | 95.88 | ~10000 | click me |
|
123 |
+
| WF42M-PFC-0.2 | r100(128*32) | 96.57 | 97.83 | 96.50 | ~9800 | click me |
|
124 |
+
|
125 |
+
`r100(128*32)` means backbone is r100, batchsize per gpu is 128, the number of gpus is 32.
|
126 |
+
|
127 |
+
|
128 |
+
|
129 |
+
#### 3. ViT For Face Recognition
|
130 |
+
|
131 |
+
| Datasets | Backbone(bs) | FLOPs | **MFR-ALL** | IJB-C(1E-4) | IJB-C(1E-5) | Throughout | log |
|
132 |
+
|:--------------|:--------------|:------|:------------|:------------|:------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------|
|
133 |
+
| WF42M-PFC-0.3 | r18(128*32) | 2.6 | 79.13 | 95.77 | 93.36 | - | click me |
|
134 |
+
| WF42M-PFC-0.3 | r50(128*32) | 6.3 | 94.03 | 97.48 | 95.94 | - | click me |
|
135 |
+
| WF42M-PFC-0.3 | r100(128*32) | 12.1 | 96.69 | 97.82 | 96.45 | - | click me |
|
136 |
+
| WF42M-PFC-0.3 | r200(128*32) | 23.5 | 97.70 | 97.97 | 96.93 | - | click me |
|
137 |
+
| WF42M-PFC-0.3 | VIT-T(384*64) | 1.5 | 92.24 | 97.31 | 95.97 | ~35000 | click me |
|
138 |
+
| WF42M-PFC-0.3 | VIT-S(384*64) | 5.7 | 95.87 | 97.73 | 96.57 | ~25000 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/pfc03_wf42m_vit_s_64gpu/training.log) |
|
139 |
+
| WF42M-PFC-0.3 | VIT-B(384*64) | 11.4 | 97.42 | 97.90 | 97.04 | ~13800 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/pfc03_wf42m_vit_b_64gpu/training.log) |
|
140 |
+
| WF42M-PFC-0.3 | VIT-L(384*64) | 25.3 | 97.85 | 98.00 | 97.23 | ~9406 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/pfc03_wf42m_vit_l_64gpu/training.log) |
|
141 |
+
|
142 |
+
`WF42M` means WebFace42M, `PFC-0.3` means negivate class centers sample rate is 0.3.
|
143 |
+
|
144 |
+
#### 4. Noisy Datasets
|
145 |
+
|
146 |
+
| Datasets | Backbone | **MFR-ALL** | IJB-C(1E-4) | IJB-C(1E-5) | log |
|
147 |
+
|:-------------------------|:---------|:------------|:------------|:------------|:---------|
|
148 |
+
| WF12M-Flip(40%) | r50 | 43.87 | 88.35 | 80.78 | click me |
|
149 |
+
| WF12M-Flip(40%)-PFC-0.1* | r50 | 80.20 | 96.11 | 93.79 | click me |
|
150 |
+
| WF12M-Conflict | r50 | 79.93 | 95.30 | 91.56 | click me |
|
151 |
+
| WF12M-Conflict-PFC-0.3* | r50 | 91.68 | 97.28 | 95.75 | click me |
|
152 |
+
|
153 |
+
`WF12M` means WebFace12M, `+PFC-0.1*` denotes additional abnormal inter-class filtering.
|
154 |
+
|
155 |
+
|
156 |
+
|
157 |
+
## Speed Benchmark
|
158 |
+
<div><img src="https://github.com/anxiangsir/insightface_arcface_log/blob/master/pfc_exp.png" width = "90%" /></div>
|
159 |
+
|
160 |
+
|
161 |
+
**Arcface-Torch** is an efficient tool for training large-scale face recognition training sets. When the number of classes in the training sets exceeds one million, the partial FC sampling strategy maintains the same accuracy while providing several times faster training performance and lower GPU memory utilization. The partial FC is a sparse variant of the model parallel architecture for large-scale face recognition, utilizing a sparse softmax that dynamically samples a subset of class centers for each training batch. During each iteration, only a sparse portion of the parameters are updated, leading to a significant reduction in GPU memory requirements and computational demands. With the partial FC approach, it is possible to train sets with up to 29 million identities, the largest to date. Furthermore, the partial FC method supports multi-machine distributed training and mixed precision training.
|
162 |
+
|
163 |
+
|
164 |
+
|
165 |
+
More details see
|
166 |
+
[speed_benchmark.md](docs/speed_benchmark.md) in docs.
|
167 |
+
|
168 |
+
> 1. Training Speed of Various Parallel Techniques (Samples per Second) on a Tesla V100 32GB x 8 System (Higher is Optimal)
|
169 |
+
|
170 |
+
`-` means training failed because of gpu memory limitations.
|
171 |
+
|
172 |
+
| Number of Identities in Dataset | Data Parallel | Model Parallel | Partial FC 0.1 |
|
173 |
+
|:--------------------------------|:--------------|:---------------|:---------------|
|
174 |
+
| 125000 | 4681 | 4824 | 5004 |
|
175 |
+
| 1400000 | **1672** | 3043 | 4738 |
|
176 |
+
| 5500000 | **-** | **1389** | 3975 |
|
177 |
+
| 8000000 | **-** | **-** | 3565 |
|
178 |
+
| 16000000 | **-** | **-** | 2679 |
|
179 |
+
| 29000000 | **-** | **-** | **1855** |
|
180 |
+
|
181 |
+
> 2. GPU Memory Utilization of Various Parallel Techniques (MB per GPU) on a Tesla V100 32GB x 8 System (Lower is Optimal)
|
182 |
+
|
183 |
+
| Number of Identities in Dataset | Data Parallel | Model Parallel | Partial FC 0.1 |
|
184 |
+
|:--------------------------------|:--------------|:---------------|:---------------|
|
185 |
+
| 125000 | 7358 | 5306 | 4868 |
|
186 |
+
| 1400000 | 32252 | 11178 | 6056 |
|
187 |
+
| 5500000 | **-** | 32188 | 9854 |
|
188 |
+
| 8000000 | **-** | **-** | 12310 |
|
189 |
+
| 16000000 | **-** | **-** | 19950 |
|
190 |
+
| 29000000 | **-** | **-** | 32324 |
|
191 |
+
|
192 |
+
|
193 |
+
## Citations
|
194 |
+
|
195 |
+
```
|
196 |
+
@inproceedings{deng2019arcface,
|
197 |
+
title={Arcface: Additive angular margin loss for deep face recognition},
|
198 |
+
author={Deng, Jiankang and Guo, Jia and Xue, Niannan and Zafeiriou, Stefanos},
|
199 |
+
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
|
200 |
+
pages={4690--4699},
|
201 |
+
year={2019}
|
202 |
+
}
|
203 |
+
@inproceedings{An_2022_CVPR,
|
204 |
+
author={An, Xiang and Deng, Jiankang and Guo, Jia and Feng, Ziyong and Zhu, XuHan and Yang, Jing and Liu, Tongliang},
|
205 |
+
title={Killing Two Birds With One Stone: Efficient and Robust Training of Face Recognition CNNs by Partial FC},
|
206 |
+
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
207 |
+
month={June},
|
208 |
+
year={2022},
|
209 |
+
pages={4042-4051}
|
210 |
+
}
|
211 |
+
@inproceedings{zhu2021webface260m,
|
212 |
+
title={Webface260m: A benchmark unveiling the power of million-scale deep face recognition},
|
213 |
+
author={Zhu, Zheng and Huang, Guan and Deng, Jiankang and Ye, Yun and Huang, Junjie and Chen, Xinze and Zhu, Jiagang and Yang, Tian and Lu, Jiwen and Du, Dalong and Zhou, Jie},
|
214 |
+
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
|
215 |
+
pages={10492--10502},
|
216 |
+
year={2021}
|
217 |
+
}
|
218 |
+
```
|
deep_3drecon/deep_3drecon_models/arcface_torch/backbones/__init__.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .iresnet import iresnet18, iresnet34, iresnet50, iresnet100, iresnet200
|
2 |
+
from .mobilefacenet import get_mbf
|
3 |
+
|
4 |
+
|
5 |
+
def get_model(name, **kwargs):
|
6 |
+
# resnet
|
7 |
+
if name == "r18":
|
8 |
+
return iresnet18(False, **kwargs)
|
9 |
+
elif name == "r34":
|
10 |
+
return iresnet34(False, **kwargs)
|
11 |
+
elif name == "r50":
|
12 |
+
return iresnet50(False, **kwargs)
|
13 |
+
elif name == "r100":
|
14 |
+
return iresnet100(False, **kwargs)
|
15 |
+
elif name == "r200":
|
16 |
+
return iresnet200(False, **kwargs)
|
17 |
+
elif name == "r2060":
|
18 |
+
from .iresnet2060 import iresnet2060
|
19 |
+
return iresnet2060(False, **kwargs)
|
20 |
+
|
21 |
+
elif name == "mbf":
|
22 |
+
fp16 = kwargs.get("fp16", False)
|
23 |
+
num_features = kwargs.get("num_features", 512)
|
24 |
+
return get_mbf(fp16=fp16, num_features=num_features)
|
25 |
+
|
26 |
+
elif name == "mbf_large":
|
27 |
+
from .mobilefacenet import get_mbf_large
|
28 |
+
fp16 = kwargs.get("fp16", False)
|
29 |
+
num_features = kwargs.get("num_features", 512)
|
30 |
+
return get_mbf_large(fp16=fp16, num_features=num_features)
|
31 |
+
|
32 |
+
elif name == "vit_t":
|
33 |
+
num_features = kwargs.get("num_features", 512)
|
34 |
+
from .vit import VisionTransformer
|
35 |
+
return VisionTransformer(
|
36 |
+
img_size=112, patch_size=9, num_classes=num_features, embed_dim=256, depth=12,
|
37 |
+
num_heads=8, drop_path_rate=0.1, norm_layer="ln", mask_ratio=0.1)
|
38 |
+
|
39 |
+
elif name == "vit_t_dp005_mask0": # For WebFace42M
|
40 |
+
num_features = kwargs.get("num_features", 512)
|
41 |
+
from .vit import VisionTransformer
|
42 |
+
return VisionTransformer(
|
43 |
+
img_size=112, patch_size=9, num_classes=num_features, embed_dim=256, depth=12,
|
44 |
+
num_heads=8, drop_path_rate=0.05, norm_layer="ln", mask_ratio=0.0)
|
45 |
+
|
46 |
+
elif name == "vit_s":
|
47 |
+
num_features = kwargs.get("num_features", 512)
|
48 |
+
from .vit import VisionTransformer
|
49 |
+
return VisionTransformer(
|
50 |
+
img_size=112, patch_size=9, num_classes=num_features, embed_dim=512, depth=12,
|
51 |
+
num_heads=8, drop_path_rate=0.1, norm_layer="ln", mask_ratio=0.1)
|
52 |
+
|
53 |
+
elif name == "vit_s_dp005_mask_0": # For WebFace42M
|
54 |
+
num_features = kwargs.get("num_features", 512)
|
55 |
+
from .vit import VisionTransformer
|
56 |
+
return VisionTransformer(
|
57 |
+
img_size=112, patch_size=9, num_classes=num_features, embed_dim=512, depth=12,
|
58 |
+
num_heads=8, drop_path_rate=0.05, norm_layer="ln", mask_ratio=0.0)
|
59 |
+
|
60 |
+
elif name == "vit_b":
|
61 |
+
# this is a feature
|
62 |
+
num_features = kwargs.get("num_features", 512)
|
63 |
+
from .vit import VisionTransformer
|
64 |
+
return VisionTransformer(
|
65 |
+
img_size=112, patch_size=9, num_classes=num_features, embed_dim=512, depth=24,
|
66 |
+
num_heads=8, drop_path_rate=0.1, norm_layer="ln", mask_ratio=0.1, using_checkpoint=True)
|
67 |
+
|
68 |
+
elif name == "vit_b_dp005_mask_005": # For WebFace42M
|
69 |
+
# this is a feature
|
70 |
+
num_features = kwargs.get("num_features", 512)
|
71 |
+
from .vit import VisionTransformer
|
72 |
+
return VisionTransformer(
|
73 |
+
img_size=112, patch_size=9, num_classes=num_features, embed_dim=512, depth=24,
|
74 |
+
num_heads=8, drop_path_rate=0.05, norm_layer="ln", mask_ratio=0.05, using_checkpoint=True)
|
75 |
+
|
76 |
+
elif name == "vit_l_dp005_mask_005": # For WebFace42M
|
77 |
+
# this is a feature
|
78 |
+
num_features = kwargs.get("num_features", 512)
|
79 |
+
from .vit import VisionTransformer
|
80 |
+
return VisionTransformer(
|
81 |
+
img_size=112, patch_size=9, num_classes=num_features, embed_dim=768, depth=24,
|
82 |
+
num_heads=8, drop_path_rate=0.05, norm_layer="ln", mask_ratio=0.05, using_checkpoint=True)
|
83 |
+
|
84 |
+
else:
|
85 |
+
raise ValueError()
|
deep_3drecon/deep_3drecon_models/arcface_torch/backbones/iresnet.py
ADDED
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch import nn
|
3 |
+
from torch.utils.checkpoint import checkpoint
|
4 |
+
|
5 |
+
__all__ = ['iresnet18', 'iresnet34', 'iresnet50', 'iresnet100', 'iresnet200']
|
6 |
+
using_ckpt = False
|
7 |
+
|
8 |
+
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
|
9 |
+
"""3x3 convolution with padding"""
|
10 |
+
return nn.Conv2d(in_planes,
|
11 |
+
out_planes,
|
12 |
+
kernel_size=3,
|
13 |
+
stride=stride,
|
14 |
+
padding=dilation,
|
15 |
+
groups=groups,
|
16 |
+
bias=False,
|
17 |
+
dilation=dilation)
|
18 |
+
|
19 |
+
|
20 |
+
def conv1x1(in_planes, out_planes, stride=1):
|
21 |
+
"""1x1 convolution"""
|
22 |
+
return nn.Conv2d(in_planes,
|
23 |
+
out_planes,
|
24 |
+
kernel_size=1,
|
25 |
+
stride=stride,
|
26 |
+
bias=False)
|
27 |
+
|
28 |
+
|
29 |
+
class IBasicBlock(nn.Module):
|
30 |
+
expansion = 1
|
31 |
+
def __init__(self, inplanes, planes, stride=1, downsample=None,
|
32 |
+
groups=1, base_width=64, dilation=1):
|
33 |
+
super(IBasicBlock, self).__init__()
|
34 |
+
if groups != 1 or base_width != 64:
|
35 |
+
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
|
36 |
+
if dilation > 1:
|
37 |
+
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
|
38 |
+
self.bn1 = nn.BatchNorm2d(inplanes, eps=1e-05,)
|
39 |
+
self.conv1 = conv3x3(inplanes, planes)
|
40 |
+
self.bn2 = nn.BatchNorm2d(planes, eps=1e-05,)
|
41 |
+
self.prelu = nn.PReLU(planes)
|
42 |
+
self.conv2 = conv3x3(planes, planes, stride)
|
43 |
+
self.bn3 = nn.BatchNorm2d(planes, eps=1e-05,)
|
44 |
+
self.downsample = downsample
|
45 |
+
self.stride = stride
|
46 |
+
|
47 |
+
def forward_impl(self, x):
|
48 |
+
identity = x
|
49 |
+
out = self.bn1(x)
|
50 |
+
out = self.conv1(out)
|
51 |
+
out = self.bn2(out)
|
52 |
+
out = self.prelu(out)
|
53 |
+
out = self.conv2(out)
|
54 |
+
out = self.bn3(out)
|
55 |
+
if self.downsample is not None:
|
56 |
+
identity = self.downsample(x)
|
57 |
+
out += identity
|
58 |
+
return out
|
59 |
+
|
60 |
+
def forward(self, x):
|
61 |
+
if self.training and using_ckpt:
|
62 |
+
return checkpoint(self.forward_impl, x)
|
63 |
+
else:
|
64 |
+
return self.forward_impl(x)
|
65 |
+
|
66 |
+
|
67 |
+
class IResNet(nn.Module):
|
68 |
+
fc_scale = 7 * 7
|
69 |
+
def __init__(self,
|
70 |
+
block, layers, dropout=0, num_features=512, zero_init_residual=False,
|
71 |
+
groups=1, width_per_group=64, replace_stride_with_dilation=None, fp16=False):
|
72 |
+
super(IResNet, self).__init__()
|
73 |
+
self.extra_gflops = 0.0
|
74 |
+
self.fp16 = fp16
|
75 |
+
self.inplanes = 64
|
76 |
+
self.dilation = 1
|
77 |
+
if replace_stride_with_dilation is None:
|
78 |
+
replace_stride_with_dilation = [False, False, False]
|
79 |
+
if len(replace_stride_with_dilation) != 3:
|
80 |
+
raise ValueError("replace_stride_with_dilation should be None "
|
81 |
+
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
|
82 |
+
self.groups = groups
|
83 |
+
self.base_width = width_per_group
|
84 |
+
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
|
85 |
+
self.bn1 = nn.BatchNorm2d(self.inplanes, eps=1e-05)
|
86 |
+
self.prelu = nn.PReLU(self.inplanes)
|
87 |
+
self.layer1 = self._make_layer(block, 64, layers[0], stride=2)
|
88 |
+
self.layer2 = self._make_layer(block,
|
89 |
+
128,
|
90 |
+
layers[1],
|
91 |
+
stride=2,
|
92 |
+
dilate=replace_stride_with_dilation[0])
|
93 |
+
self.layer3 = self._make_layer(block,
|
94 |
+
256,
|
95 |
+
layers[2],
|
96 |
+
stride=2,
|
97 |
+
dilate=replace_stride_with_dilation[1])
|
98 |
+
self.layer4 = self._make_layer(block,
|
99 |
+
512,
|
100 |
+
layers[3],
|
101 |
+
stride=2,
|
102 |
+
dilate=replace_stride_with_dilation[2])
|
103 |
+
self.bn2 = nn.BatchNorm2d(512 * block.expansion, eps=1e-05,)
|
104 |
+
self.dropout = nn.Dropout(p=dropout, inplace=True)
|
105 |
+
self.fc = nn.Linear(512 * block.expansion * self.fc_scale, num_features)
|
106 |
+
self.features = nn.BatchNorm1d(num_features, eps=1e-05)
|
107 |
+
nn.init.constant_(self.features.weight, 1.0)
|
108 |
+
self.features.weight.requires_grad = False
|
109 |
+
|
110 |
+
for m in self.modules():
|
111 |
+
if isinstance(m, nn.Conv2d):
|
112 |
+
nn.init.normal_(m.weight, 0, 0.1)
|
113 |
+
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
|
114 |
+
nn.init.constant_(m.weight, 1)
|
115 |
+
nn.init.constant_(m.bias, 0)
|
116 |
+
|
117 |
+
if zero_init_residual:
|
118 |
+
for m in self.modules():
|
119 |
+
if isinstance(m, IBasicBlock):
|
120 |
+
nn.init.constant_(m.bn2.weight, 0)
|
121 |
+
|
122 |
+
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
|
123 |
+
downsample = None
|
124 |
+
previous_dilation = self.dilation
|
125 |
+
if dilate:
|
126 |
+
self.dilation *= stride
|
127 |
+
stride = 1
|
128 |
+
if stride != 1 or self.inplanes != planes * block.expansion:
|
129 |
+
downsample = nn.Sequential(
|
130 |
+
conv1x1(self.inplanes, planes * block.expansion, stride),
|
131 |
+
nn.BatchNorm2d(planes * block.expansion, eps=1e-05, ),
|
132 |
+
)
|
133 |
+
layers = []
|
134 |
+
layers.append(
|
135 |
+
block(self.inplanes, planes, stride, downsample, self.groups,
|
136 |
+
self.base_width, previous_dilation))
|
137 |
+
self.inplanes = planes * block.expansion
|
138 |
+
for _ in range(1, blocks):
|
139 |
+
layers.append(
|
140 |
+
block(self.inplanes,
|
141 |
+
planes,
|
142 |
+
groups=self.groups,
|
143 |
+
base_width=self.base_width,
|
144 |
+
dilation=self.dilation))
|
145 |
+
|
146 |
+
return nn.Sequential(*layers)
|
147 |
+
|
148 |
+
def forward(self, x):
|
149 |
+
with torch.cuda.amp.autocast(self.fp16):
|
150 |
+
x = self.conv1(x)
|
151 |
+
x = self.bn1(x)
|
152 |
+
x = self.prelu(x)
|
153 |
+
x = self.layer1(x)
|
154 |
+
x = self.layer2(x)
|
155 |
+
x = self.layer3(x)
|
156 |
+
x = self.layer4(x)
|
157 |
+
x = self.bn2(x)
|
158 |
+
x = torch.flatten(x, 1)
|
159 |
+
x = self.dropout(x)
|
160 |
+
x = self.fc(x.float() if self.fp16 else x)
|
161 |
+
x = self.features(x)
|
162 |
+
return x
|
163 |
+
|
164 |
+
|
165 |
+
def _iresnet(arch, block, layers, pretrained, progress, **kwargs):
|
166 |
+
model = IResNet(block, layers, **kwargs)
|
167 |
+
if pretrained:
|
168 |
+
raise ValueError()
|
169 |
+
return model
|
170 |
+
|
171 |
+
|
172 |
+
def iresnet18(pretrained=False, progress=True, **kwargs):
|
173 |
+
return _iresnet('iresnet18', IBasicBlock, [2, 2, 2, 2], pretrained,
|
174 |
+
progress, **kwargs)
|
175 |
+
|
176 |
+
|
177 |
+
def iresnet34(pretrained=False, progress=True, **kwargs):
|
178 |
+
return _iresnet('iresnet34', IBasicBlock, [3, 4, 6, 3], pretrained,
|
179 |
+
progress, **kwargs)
|
180 |
+
|
181 |
+
|
182 |
+
def iresnet50(pretrained=False, progress=True, **kwargs):
|
183 |
+
return _iresnet('iresnet50', IBasicBlock, [3, 4, 14, 3], pretrained,
|
184 |
+
progress, **kwargs)
|
185 |
+
|
186 |
+
|
187 |
+
def iresnet100(pretrained=False, progress=True, **kwargs):
|
188 |
+
return _iresnet('iresnet100', IBasicBlock, [3, 13, 30, 3], pretrained,
|
189 |
+
progress, **kwargs)
|
190 |
+
|
191 |
+
|
192 |
+
def iresnet200(pretrained=False, progress=True, **kwargs):
|
193 |
+
return _iresnet('iresnet200', IBasicBlock, [6, 26, 60, 6], pretrained,
|
194 |
+
progress, **kwargs)
|
deep_3drecon/deep_3drecon_models/arcface_torch/backbones/iresnet2060.py
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch import nn
|
3 |
+
|
4 |
+
assert torch.__version__ >= "1.8.1"
|
5 |
+
from torch.utils.checkpoint import checkpoint_sequential
|
6 |
+
|
7 |
+
__all__ = ['iresnet2060']
|
8 |
+
|
9 |
+
|
10 |
+
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
|
11 |
+
"""3x3 convolution with padding"""
|
12 |
+
return nn.Conv2d(in_planes,
|
13 |
+
out_planes,
|
14 |
+
kernel_size=3,
|
15 |
+
stride=stride,
|
16 |
+
padding=dilation,
|
17 |
+
groups=groups,
|
18 |
+
bias=False,
|
19 |
+
dilation=dilation)
|
20 |
+
|
21 |
+
|
22 |
+
def conv1x1(in_planes, out_planes, stride=1):
|
23 |
+
"""1x1 convolution"""
|
24 |
+
return nn.Conv2d(in_planes,
|
25 |
+
out_planes,
|
26 |
+
kernel_size=1,
|
27 |
+
stride=stride,
|
28 |
+
bias=False)
|
29 |
+
|
30 |
+
|
31 |
+
class IBasicBlock(nn.Module):
|
32 |
+
expansion = 1
|
33 |
+
|
34 |
+
def __init__(self, inplanes, planes, stride=1, downsample=None,
|
35 |
+
groups=1, base_width=64, dilation=1):
|
36 |
+
super(IBasicBlock, self).__init__()
|
37 |
+
if groups != 1 or base_width != 64:
|
38 |
+
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
|
39 |
+
if dilation > 1:
|
40 |
+
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
|
41 |
+
self.bn1 = nn.BatchNorm2d(inplanes, eps=1e-05, )
|
42 |
+
self.conv1 = conv3x3(inplanes, planes)
|
43 |
+
self.bn2 = nn.BatchNorm2d(planes, eps=1e-05, )
|
44 |
+
self.prelu = nn.PReLU(planes)
|
45 |
+
self.conv2 = conv3x3(planes, planes, stride)
|
46 |
+
self.bn3 = nn.BatchNorm2d(planes, eps=1e-05, )
|
47 |
+
self.downsample = downsample
|
48 |
+
self.stride = stride
|
49 |
+
|
50 |
+
def forward(self, x):
|
51 |
+
identity = x
|
52 |
+
out = self.bn1(x)
|
53 |
+
out = self.conv1(out)
|
54 |
+
out = self.bn2(out)
|
55 |
+
out = self.prelu(out)
|
56 |
+
out = self.conv2(out)
|
57 |
+
out = self.bn3(out)
|
58 |
+
if self.downsample is not None:
|
59 |
+
identity = self.downsample(x)
|
60 |
+
out += identity
|
61 |
+
return out
|
62 |
+
|
63 |
+
|
64 |
+
class IResNet(nn.Module):
|
65 |
+
fc_scale = 7 * 7
|
66 |
+
|
67 |
+
def __init__(self,
|
68 |
+
block, layers, dropout=0, num_features=512, zero_init_residual=False,
|
69 |
+
groups=1, width_per_group=64, replace_stride_with_dilation=None, fp16=False):
|
70 |
+
super(IResNet, self).__init__()
|
71 |
+
self.fp16 = fp16
|
72 |
+
self.inplanes = 64
|
73 |
+
self.dilation = 1
|
74 |
+
if replace_stride_with_dilation is None:
|
75 |
+
replace_stride_with_dilation = [False, False, False]
|
76 |
+
if len(replace_stride_with_dilation) != 3:
|
77 |
+
raise ValueError("replace_stride_with_dilation should be None "
|
78 |
+
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
|
79 |
+
self.groups = groups
|
80 |
+
self.base_width = width_per_group
|
81 |
+
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
|
82 |
+
self.bn1 = nn.BatchNorm2d(self.inplanes, eps=1e-05)
|
83 |
+
self.prelu = nn.PReLU(self.inplanes)
|
84 |
+
self.layer1 = self._make_layer(block, 64, layers[0], stride=2)
|
85 |
+
self.layer2 = self._make_layer(block,
|
86 |
+
128,
|
87 |
+
layers[1],
|
88 |
+
stride=2,
|
89 |
+
dilate=replace_stride_with_dilation[0])
|
90 |
+
self.layer3 = self._make_layer(block,
|
91 |
+
256,
|
92 |
+
layers[2],
|
93 |
+
stride=2,
|
94 |
+
dilate=replace_stride_with_dilation[1])
|
95 |
+
self.layer4 = self._make_layer(block,
|
96 |
+
512,
|
97 |
+
layers[3],
|
98 |
+
stride=2,
|
99 |
+
dilate=replace_stride_with_dilation[2])
|
100 |
+
self.bn2 = nn.BatchNorm2d(512 * block.expansion, eps=1e-05, )
|
101 |
+
self.dropout = nn.Dropout(p=dropout, inplace=True)
|
102 |
+
self.fc = nn.Linear(512 * block.expansion * self.fc_scale, num_features)
|
103 |
+
self.features = nn.BatchNorm1d(num_features, eps=1e-05)
|
104 |
+
nn.init.constant_(self.features.weight, 1.0)
|
105 |
+
self.features.weight.requires_grad = False
|
106 |
+
|
107 |
+
for m in self.modules():
|
108 |
+
if isinstance(m, nn.Conv2d):
|
109 |
+
nn.init.normal_(m.weight, 0, 0.1)
|
110 |
+
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
|
111 |
+
nn.init.constant_(m.weight, 1)
|
112 |
+
nn.init.constant_(m.bias, 0)
|
113 |
+
|
114 |
+
if zero_init_residual:
|
115 |
+
for m in self.modules():
|
116 |
+
if isinstance(m, IBasicBlock):
|
117 |
+
nn.init.constant_(m.bn2.weight, 0)
|
118 |
+
|
119 |
+
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
|
120 |
+
downsample = None
|
121 |
+
previous_dilation = self.dilation
|
122 |
+
if dilate:
|
123 |
+
self.dilation *= stride
|
124 |
+
stride = 1
|
125 |
+
if stride != 1 or self.inplanes != planes * block.expansion:
|
126 |
+
downsample = nn.Sequential(
|
127 |
+
conv1x1(self.inplanes, planes * block.expansion, stride),
|
128 |
+
nn.BatchNorm2d(planes * block.expansion, eps=1e-05, ),
|
129 |
+
)
|
130 |
+
layers = []
|
131 |
+
layers.append(
|
132 |
+
block(self.inplanes, planes, stride, downsample, self.groups,
|
133 |
+
self.base_width, previous_dilation))
|
134 |
+
self.inplanes = planes * block.expansion
|
135 |
+
for _ in range(1, blocks):
|
136 |
+
layers.append(
|
137 |
+
block(self.inplanes,
|
138 |
+
planes,
|
139 |
+
groups=self.groups,
|
140 |
+
base_width=self.base_width,
|
141 |
+
dilation=self.dilation))
|
142 |
+
|
143 |
+
return nn.Sequential(*layers)
|
144 |
+
|
145 |
+
def checkpoint(self, func, num_seg, x):
|
146 |
+
if self.training:
|
147 |
+
return checkpoint_sequential(func, num_seg, x)
|
148 |
+
else:
|
149 |
+
return func(x)
|
150 |
+
|
151 |
+
def forward(self, x):
|
152 |
+
with torch.cuda.amp.autocast(self.fp16):
|
153 |
+
x = self.conv1(x)
|
154 |
+
x = self.bn1(x)
|
155 |
+
x = self.prelu(x)
|
156 |
+
x = self.layer1(x)
|
157 |
+
x = self.checkpoint(self.layer2, 20, x)
|
158 |
+
x = self.checkpoint(self.layer3, 100, x)
|
159 |
+
x = self.layer4(x)
|
160 |
+
x = self.bn2(x)
|
161 |
+
x = torch.flatten(x, 1)
|
162 |
+
x = self.dropout(x)
|
163 |
+
x = self.fc(x.float() if self.fp16 else x)
|
164 |
+
x = self.features(x)
|
165 |
+
return x
|
166 |
+
|
167 |
+
|
168 |
+
def _iresnet(arch, block, layers, pretrained, progress, **kwargs):
|
169 |
+
model = IResNet(block, layers, **kwargs)
|
170 |
+
if pretrained:
|
171 |
+
raise ValueError()
|
172 |
+
return model
|
173 |
+
|
174 |
+
|
175 |
+
def iresnet2060(pretrained=False, progress=True, **kwargs):
|
176 |
+
return _iresnet('iresnet2060', IBasicBlock, [3, 128, 1024 - 128, 3], pretrained, progress, **kwargs)
|
deep_3drecon/deep_3drecon_models/arcface_torch/backbones/mobilefacenet.py
ADDED
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''
|
2 |
+
Adapted from https://github.com/cavalleria/cavaface.pytorch/blob/master/backbone/mobilefacenet.py
|
3 |
+
Original author cavalleria
|
4 |
+
'''
|
5 |
+
|
6 |
+
import torch.nn as nn
|
7 |
+
from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, Sequential, Module
|
8 |
+
import torch
|
9 |
+
|
10 |
+
|
11 |
+
class Flatten(Module):
|
12 |
+
def forward(self, x):
|
13 |
+
return x.view(x.size(0), -1)
|
14 |
+
|
15 |
+
|
16 |
+
class ConvBlock(Module):
|
17 |
+
def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
|
18 |
+
super(ConvBlock, self).__init__()
|
19 |
+
self.layers = nn.Sequential(
|
20 |
+
Conv2d(in_c, out_c, kernel, groups=groups, stride=stride, padding=padding, bias=False),
|
21 |
+
BatchNorm2d(num_features=out_c),
|
22 |
+
PReLU(num_parameters=out_c)
|
23 |
+
)
|
24 |
+
|
25 |
+
def forward(self, x):
|
26 |
+
return self.layers(x)
|
27 |
+
|
28 |
+
|
29 |
+
class LinearBlock(Module):
|
30 |
+
def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
|
31 |
+
super(LinearBlock, self).__init__()
|
32 |
+
self.layers = nn.Sequential(
|
33 |
+
Conv2d(in_c, out_c, kernel, stride, padding, groups=groups, bias=False),
|
34 |
+
BatchNorm2d(num_features=out_c)
|
35 |
+
)
|
36 |
+
|
37 |
+
def forward(self, x):
|
38 |
+
return self.layers(x)
|
39 |
+
|
40 |
+
|
41 |
+
class DepthWise(Module):
|
42 |
+
def __init__(self, in_c, out_c, residual=False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1):
|
43 |
+
super(DepthWise, self).__init__()
|
44 |
+
self.residual = residual
|
45 |
+
self.layers = nn.Sequential(
|
46 |
+
ConvBlock(in_c, out_c=groups, kernel=(1, 1), padding=(0, 0), stride=(1, 1)),
|
47 |
+
ConvBlock(groups, groups, groups=groups, kernel=kernel, padding=padding, stride=stride),
|
48 |
+
LinearBlock(groups, out_c, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
|
49 |
+
)
|
50 |
+
|
51 |
+
def forward(self, x):
|
52 |
+
short_cut = None
|
53 |
+
if self.residual:
|
54 |
+
short_cut = x
|
55 |
+
x = self.layers(x)
|
56 |
+
if self.residual:
|
57 |
+
output = short_cut + x
|
58 |
+
else:
|
59 |
+
output = x
|
60 |
+
return output
|
61 |
+
|
62 |
+
|
63 |
+
class Residual(Module):
|
64 |
+
def __init__(self, c, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)):
|
65 |
+
super(Residual, self).__init__()
|
66 |
+
modules = []
|
67 |
+
for _ in range(num_block):
|
68 |
+
modules.append(DepthWise(c, c, True, kernel, stride, padding, groups))
|
69 |
+
self.layers = Sequential(*modules)
|
70 |
+
|
71 |
+
def forward(self, x):
|
72 |
+
return self.layers(x)
|
73 |
+
|
74 |
+
|
75 |
+
class GDC(Module):
|
76 |
+
def __init__(self, embedding_size):
|
77 |
+
super(GDC, self).__init__()
|
78 |
+
self.layers = nn.Sequential(
|
79 |
+
LinearBlock(512, 512, groups=512, kernel=(7, 7), stride=(1, 1), padding=(0, 0)),
|
80 |
+
Flatten(),
|
81 |
+
Linear(512, embedding_size, bias=False),
|
82 |
+
BatchNorm1d(embedding_size))
|
83 |
+
|
84 |
+
def forward(self, x):
|
85 |
+
return self.layers(x)
|
86 |
+
|
87 |
+
|
88 |
+
class MobileFaceNet(Module):
|
89 |
+
def __init__(self, fp16=False, num_features=512, blocks=(1, 4, 6, 2), scale=2):
|
90 |
+
super(MobileFaceNet, self).__init__()
|
91 |
+
self.scale = scale
|
92 |
+
self.fp16 = fp16
|
93 |
+
self.layers = nn.ModuleList()
|
94 |
+
self.layers.append(
|
95 |
+
ConvBlock(3, 64 * self.scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1))
|
96 |
+
)
|
97 |
+
if blocks[0] == 1:
|
98 |
+
self.layers.append(
|
99 |
+
ConvBlock(64 * self.scale, 64 * self.scale, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64)
|
100 |
+
)
|
101 |
+
else:
|
102 |
+
self.layers.append(
|
103 |
+
Residual(64 * self.scale, num_block=blocks[0], groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1)),
|
104 |
+
)
|
105 |
+
|
106 |
+
self.layers.extend(
|
107 |
+
[
|
108 |
+
DepthWise(64 * self.scale, 64 * self.scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128),
|
109 |
+
Residual(64 * self.scale, num_block=blocks[1], groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1)),
|
110 |
+
DepthWise(64 * self.scale, 128 * self.scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256),
|
111 |
+
Residual(128 * self.scale, num_block=blocks[2], groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)),
|
112 |
+
DepthWise(128 * self.scale, 128 * self.scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=512),
|
113 |
+
Residual(128 * self.scale, num_block=blocks[3], groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)),
|
114 |
+
])
|
115 |
+
|
116 |
+
self.conv_sep = ConvBlock(128 * self.scale, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0))
|
117 |
+
self.features = GDC(num_features)
|
118 |
+
self._initialize_weights()
|
119 |
+
|
120 |
+
def _initialize_weights(self):
|
121 |
+
for m in self.modules():
|
122 |
+
if isinstance(m, nn.Conv2d):
|
123 |
+
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
124 |
+
if m.bias is not None:
|
125 |
+
m.bias.data.zero_()
|
126 |
+
elif isinstance(m, nn.BatchNorm2d):
|
127 |
+
m.weight.data.fill_(1)
|
128 |
+
m.bias.data.zero_()
|
129 |
+
elif isinstance(m, nn.Linear):
|
130 |
+
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
131 |
+
if m.bias is not None:
|
132 |
+
m.bias.data.zero_()
|
133 |
+
|
134 |
+
def forward(self, x):
|
135 |
+
with torch.cuda.amp.autocast(self.fp16):
|
136 |
+
for func in self.layers:
|
137 |
+
x = func(x)
|
138 |
+
x = self.conv_sep(x.float() if self.fp16 else x)
|
139 |
+
x = self.features(x)
|
140 |
+
return x
|
141 |
+
|
142 |
+
|
143 |
+
def get_mbf(fp16, num_features, blocks=(1, 4, 6, 2), scale=2):
|
144 |
+
return MobileFaceNet(fp16, num_features, blocks, scale=scale)
|
145 |
+
|
146 |
+
def get_mbf_large(fp16, num_features, blocks=(2, 8, 12, 4), scale=4):
|
147 |
+
return MobileFaceNet(fp16, num_features, blocks, scale=scale)
|
deep_3drecon/deep_3drecon_models/arcface_torch/backbones/vit.py
ADDED
@@ -0,0 +1,280 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
from timm.models.layers import DropPath, to_2tuple, trunc_normal_
|
4 |
+
from typing import Optional, Callable
|
5 |
+
|
6 |
+
class Mlp(nn.Module):
|
7 |
+
def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.ReLU6, drop=0.):
|
8 |
+
super().__init__()
|
9 |
+
out_features = out_features or in_features
|
10 |
+
hidden_features = hidden_features or in_features
|
11 |
+
self.fc1 = nn.Linear(in_features, hidden_features)
|
12 |
+
self.act = act_layer()
|
13 |
+
self.fc2 = nn.Linear(hidden_features, out_features)
|
14 |
+
self.drop = nn.Dropout(drop)
|
15 |
+
|
16 |
+
def forward(self, x):
|
17 |
+
x = self.fc1(x)
|
18 |
+
x = self.act(x)
|
19 |
+
x = self.drop(x)
|
20 |
+
x = self.fc2(x)
|
21 |
+
x = self.drop(x)
|
22 |
+
return x
|
23 |
+
|
24 |
+
|
25 |
+
class VITBatchNorm(nn.Module):
|
26 |
+
def __init__(self, num_features):
|
27 |
+
super().__init__()
|
28 |
+
self.num_features = num_features
|
29 |
+
self.bn = nn.BatchNorm1d(num_features=num_features)
|
30 |
+
|
31 |
+
def forward(self, x):
|
32 |
+
return self.bn(x)
|
33 |
+
|
34 |
+
|
35 |
+
class Attention(nn.Module):
|
36 |
+
def __init__(self,
|
37 |
+
dim: int,
|
38 |
+
num_heads: int = 8,
|
39 |
+
qkv_bias: bool = False,
|
40 |
+
qk_scale: Optional[None] = None,
|
41 |
+
attn_drop: float = 0.,
|
42 |
+
proj_drop: float = 0.):
|
43 |
+
super().__init__()
|
44 |
+
self.num_heads = num_heads
|
45 |
+
head_dim = dim // num_heads
|
46 |
+
# NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
|
47 |
+
self.scale = qk_scale or head_dim ** -0.5
|
48 |
+
|
49 |
+
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
|
50 |
+
self.attn_drop = nn.Dropout(attn_drop)
|
51 |
+
self.proj = nn.Linear(dim, dim)
|
52 |
+
self.proj_drop = nn.Dropout(proj_drop)
|
53 |
+
|
54 |
+
def forward(self, x):
|
55 |
+
|
56 |
+
with torch.cuda.amp.autocast(True):
|
57 |
+
batch_size, num_token, embed_dim = x.shape
|
58 |
+
#qkv is [3,batch_size,num_heads,num_token, embed_dim//num_heads]
|
59 |
+
qkv = self.qkv(x).reshape(
|
60 |
+
batch_size, num_token, 3, self.num_heads, embed_dim // self.num_heads).permute(2, 0, 3, 1, 4)
|
61 |
+
with torch.cuda.amp.autocast(False):
|
62 |
+
q, k, v = qkv[0].float(), qkv[1].float(), qkv[2].float()
|
63 |
+
attn = (q @ k.transpose(-2, -1)) * self.scale
|
64 |
+
attn = attn.softmax(dim=-1)
|
65 |
+
attn = self.attn_drop(attn)
|
66 |
+
x = (attn @ v).transpose(1, 2).reshape(batch_size, num_token, embed_dim)
|
67 |
+
with torch.cuda.amp.autocast(True):
|
68 |
+
x = self.proj(x)
|
69 |
+
x = self.proj_drop(x)
|
70 |
+
return x
|
71 |
+
|
72 |
+
|
73 |
+
class Block(nn.Module):
|
74 |
+
|
75 |
+
def __init__(self,
|
76 |
+
dim: int,
|
77 |
+
num_heads: int,
|
78 |
+
num_patches: int,
|
79 |
+
mlp_ratio: float = 4.,
|
80 |
+
qkv_bias: bool = False,
|
81 |
+
qk_scale: Optional[None] = None,
|
82 |
+
drop: float = 0.,
|
83 |
+
attn_drop: float = 0.,
|
84 |
+
drop_path: float = 0.,
|
85 |
+
act_layer: Callable = nn.ReLU6,
|
86 |
+
norm_layer: str = "ln",
|
87 |
+
patch_n: int = 144):
|
88 |
+
super().__init__()
|
89 |
+
|
90 |
+
if norm_layer == "bn":
|
91 |
+
self.norm1 = VITBatchNorm(num_features=num_patches)
|
92 |
+
self.norm2 = VITBatchNorm(num_features=num_patches)
|
93 |
+
elif norm_layer == "ln":
|
94 |
+
self.norm1 = nn.LayerNorm(dim)
|
95 |
+
self.norm2 = nn.LayerNorm(dim)
|
96 |
+
|
97 |
+
self.attn = Attention(
|
98 |
+
dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
|
99 |
+
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
|
100 |
+
self.drop_path = DropPath(
|
101 |
+
drop_path) if drop_path > 0. else nn.Identity()
|
102 |
+
mlp_hidden_dim = int(dim * mlp_ratio)
|
103 |
+
self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim,
|
104 |
+
act_layer=act_layer, drop=drop)
|
105 |
+
self.extra_gflops = (num_heads * patch_n * (dim//num_heads)*patch_n * 2) / (1000**3)
|
106 |
+
|
107 |
+
def forward(self, x):
|
108 |
+
x = x + self.drop_path(self.attn(self.norm1(x)))
|
109 |
+
with torch.cuda.amp.autocast(True):
|
110 |
+
x = x + self.drop_path(self.mlp(self.norm2(x)))
|
111 |
+
return x
|
112 |
+
|
113 |
+
|
114 |
+
class PatchEmbed(nn.Module):
|
115 |
+
def __init__(self, img_size=108, patch_size=9, in_channels=3, embed_dim=768):
|
116 |
+
super().__init__()
|
117 |
+
img_size = to_2tuple(img_size)
|
118 |
+
patch_size = to_2tuple(patch_size)
|
119 |
+
num_patches = (img_size[1] // patch_size[1]) * \
|
120 |
+
(img_size[0] // patch_size[0])
|
121 |
+
self.img_size = img_size
|
122 |
+
self.patch_size = patch_size
|
123 |
+
self.num_patches = num_patches
|
124 |
+
self.proj = nn.Conv2d(in_channels, embed_dim,
|
125 |
+
kernel_size=patch_size, stride=patch_size)
|
126 |
+
|
127 |
+
def forward(self, x):
|
128 |
+
batch_size, channels, height, width = x.shape
|
129 |
+
assert height == self.img_size[0] and width == self.img_size[1], \
|
130 |
+
f"Input image size ({height}*{width}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
|
131 |
+
x = self.proj(x).flatten(2).transpose(1, 2)
|
132 |
+
return x
|
133 |
+
|
134 |
+
|
135 |
+
class VisionTransformer(nn.Module):
|
136 |
+
""" Vision Transformer with support for patch or hybrid CNN input stage
|
137 |
+
"""
|
138 |
+
|
139 |
+
def __init__(self,
|
140 |
+
img_size: int = 112,
|
141 |
+
patch_size: int = 16,
|
142 |
+
in_channels: int = 3,
|
143 |
+
num_classes: int = 1000,
|
144 |
+
embed_dim: int = 768,
|
145 |
+
depth: int = 12,
|
146 |
+
num_heads: int = 12,
|
147 |
+
mlp_ratio: float = 4.,
|
148 |
+
qkv_bias: bool = False,
|
149 |
+
qk_scale: Optional[None] = None,
|
150 |
+
drop_rate: float = 0.,
|
151 |
+
attn_drop_rate: float = 0.,
|
152 |
+
drop_path_rate: float = 0.,
|
153 |
+
hybrid_backbone: Optional[None] = None,
|
154 |
+
norm_layer: str = "ln",
|
155 |
+
mask_ratio = 0.1,
|
156 |
+
using_checkpoint = False,
|
157 |
+
):
|
158 |
+
super().__init__()
|
159 |
+
self.num_classes = num_classes
|
160 |
+
# num_features for consistency with other models
|
161 |
+
self.num_features = self.embed_dim = embed_dim
|
162 |
+
|
163 |
+
if hybrid_backbone is not None:
|
164 |
+
raise ValueError
|
165 |
+
else:
|
166 |
+
self.patch_embed = PatchEmbed(img_size=img_size, patch_size=patch_size, in_channels=in_channels, embed_dim=embed_dim)
|
167 |
+
self.mask_ratio = mask_ratio
|
168 |
+
self.using_checkpoint = using_checkpoint
|
169 |
+
num_patches = self.patch_embed.num_patches
|
170 |
+
self.num_patches = num_patches
|
171 |
+
|
172 |
+
self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
|
173 |
+
self.pos_drop = nn.Dropout(p=drop_rate)
|
174 |
+
|
175 |
+
# stochastic depth decay rule
|
176 |
+
dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]
|
177 |
+
patch_n = (img_size//patch_size)**2
|
178 |
+
self.blocks = nn.ModuleList(
|
179 |
+
[
|
180 |
+
Block(dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
|
181 |
+
drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer,
|
182 |
+
num_patches=num_patches, patch_n=patch_n)
|
183 |
+
for i in range(depth)]
|
184 |
+
)
|
185 |
+
self.extra_gflops = 0.0
|
186 |
+
for _block in self.blocks:
|
187 |
+
self.extra_gflops += _block.extra_gflops
|
188 |
+
|
189 |
+
if norm_layer == "ln":
|
190 |
+
self.norm = nn.LayerNorm(embed_dim)
|
191 |
+
elif norm_layer == "bn":
|
192 |
+
self.norm = VITBatchNorm(self.num_patches)
|
193 |
+
|
194 |
+
# features head
|
195 |
+
self.feature = nn.Sequential(
|
196 |
+
nn.Linear(in_features=embed_dim * num_patches, out_features=embed_dim, bias=False),
|
197 |
+
nn.BatchNorm1d(num_features=embed_dim, eps=2e-5),
|
198 |
+
nn.Linear(in_features=embed_dim, out_features=num_classes, bias=False),
|
199 |
+
nn.BatchNorm1d(num_features=num_classes, eps=2e-5)
|
200 |
+
)
|
201 |
+
|
202 |
+
self.mask_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
|
203 |
+
torch.nn.init.normal_(self.mask_token, std=.02)
|
204 |
+
trunc_normal_(self.pos_embed, std=.02)
|
205 |
+
# trunc_normal_(self.cls_token, std=.02)
|
206 |
+
self.apply(self._init_weights)
|
207 |
+
|
208 |
+
def _init_weights(self, m):
|
209 |
+
if isinstance(m, nn.Linear):
|
210 |
+
trunc_normal_(m.weight, std=.02)
|
211 |
+
if isinstance(m, nn.Linear) and m.bias is not None:
|
212 |
+
nn.init.constant_(m.bias, 0)
|
213 |
+
elif isinstance(m, nn.LayerNorm):
|
214 |
+
nn.init.constant_(m.bias, 0)
|
215 |
+
nn.init.constant_(m.weight, 1.0)
|
216 |
+
|
217 |
+
@torch.jit.ignore
|
218 |
+
def no_weight_decay(self):
|
219 |
+
return {'pos_embed', 'cls_token'}
|
220 |
+
|
221 |
+
def get_classifier(self):
|
222 |
+
return self.head
|
223 |
+
|
224 |
+
def random_masking(self, x, mask_ratio=0.1):
|
225 |
+
"""
|
226 |
+
Perform per-sample random masking by per-sample shuffling.
|
227 |
+
Per-sample shuffling is done by argsort random noise.
|
228 |
+
x: [N, L, D], sequence
|
229 |
+
"""
|
230 |
+
N, L, D = x.size() # batch, length, dim
|
231 |
+
len_keep = int(L * (1 - mask_ratio))
|
232 |
+
|
233 |
+
noise = torch.rand(N, L, device=x.device) # noise in [0, 1]
|
234 |
+
|
235 |
+
# sort noise for each sample
|
236 |
+
# ascend: small is keep, large is remove
|
237 |
+
ids_shuffle = torch.argsort(noise, dim=1)
|
238 |
+
ids_restore = torch.argsort(ids_shuffle, dim=1)
|
239 |
+
|
240 |
+
# keep the first subset
|
241 |
+
ids_keep = ids_shuffle[:, :len_keep]
|
242 |
+
x_masked = torch.gather(
|
243 |
+
x, dim=1, index=ids_keep.unsqueeze(-1).repeat(1, 1, D))
|
244 |
+
|
245 |
+
# generate the binary mask: 0 is keep, 1 is remove
|
246 |
+
mask = torch.ones([N, L], device=x.device)
|
247 |
+
mask[:, :len_keep] = 0
|
248 |
+
# unshuffle to get the binary mask
|
249 |
+
mask = torch.gather(mask, dim=1, index=ids_restore)
|
250 |
+
|
251 |
+
return x_masked, mask, ids_restore
|
252 |
+
|
253 |
+
def forward_features(self, x):
|
254 |
+
B = x.shape[0]
|
255 |
+
x = self.patch_embed(x)
|
256 |
+
x = x + self.pos_embed
|
257 |
+
x = self.pos_drop(x)
|
258 |
+
|
259 |
+
if self.training and self.mask_ratio > 0:
|
260 |
+
x, _, ids_restore = self.random_masking(x)
|
261 |
+
|
262 |
+
for func in self.blocks:
|
263 |
+
if self.using_checkpoint and self.training:
|
264 |
+
from torch.utils.checkpoint import checkpoint
|
265 |
+
x = checkpoint(func, x)
|
266 |
+
else:
|
267 |
+
x = func(x)
|
268 |
+
x = self.norm(x.float())
|
269 |
+
|
270 |
+
if self.training and self.mask_ratio > 0:
|
271 |
+
mask_tokens = self.mask_token.repeat(x.shape[0], ids_restore.shape[1] - x.shape[1], 1)
|
272 |
+
x_ = torch.cat([x[:, :, :], mask_tokens], dim=1) # no cls token
|
273 |
+
x_ = torch.gather(x_, dim=1, index=ids_restore.unsqueeze(-1).repeat(1, 1, x.shape[2])) # unshuffle
|
274 |
+
x = x_
|
275 |
+
return torch.reshape(x, (B, self.num_patches * self.embed_dim))
|
276 |
+
|
277 |
+
def forward(self, x):
|
278 |
+
x = self.forward_features(x)
|
279 |
+
x = self.feature(x)
|
280 |
+
return x
|
deep_3drecon/deep_3drecon_models/arcface_torch/configs/3millions.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from easydict import EasyDict as edict
|
2 |
+
|
3 |
+
# configs for test speed
|
4 |
+
|
5 |
+
config = edict()
|
6 |
+
config.margin_list = (1.0, 0.0, 0.4)
|
7 |
+
config.network = "mbf"
|
8 |
+
config.resume = False
|
9 |
+
config.output = None
|
10 |
+
config.embedding_size = 512
|
11 |
+
config.sample_rate = 0.1
|
12 |
+
config.fp16 = True
|
13 |
+
config.momentum = 0.9
|
14 |
+
config.weight_decay = 5e-4
|
15 |
+
config.batch_size = 512 # total_batch_size = batch_size * num_gpus
|
16 |
+
config.lr = 0.1 # batch size is 512
|
17 |
+
|
18 |
+
config.rec = "synthetic"
|
19 |
+
config.num_classes = 30 * 10000
|
20 |
+
config.num_image = 100000
|
21 |
+
config.num_epoch = 30
|
22 |
+
config.warmup_epoch = -1
|
23 |
+
config.val_targets = []
|
deep_3drecon/deep_3drecon_models/arcface_torch/configs/__init__.py
ADDED
File without changes
|
deep_3drecon/deep_3drecon_models/arcface_torch/configs/base.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from easydict import EasyDict as edict
|
2 |
+
|
3 |
+
# make training faster
|
4 |
+
# our RAM is 256G
|
5 |
+
# mount -t tmpfs -o size=140G tmpfs /train_tmp
|
6 |
+
|
7 |
+
config = edict()
|
8 |
+
|
9 |
+
# Margin Base Softmax
|
10 |
+
config.margin_list = (1.0, 0.5, 0.0)
|
11 |
+
config.network = "r50"
|
12 |
+
config.resume = False
|
13 |
+
config.save_all_states = False
|
14 |
+
config.output = "ms1mv3_arcface_r50"
|
15 |
+
|
16 |
+
config.embedding_size = 512
|
17 |
+
|
18 |
+
# Partial FC
|
19 |
+
config.sample_rate = 1
|
20 |
+
config.interclass_filtering_threshold = 0
|
21 |
+
|
22 |
+
config.fp16 = False
|
23 |
+
config.batch_size = 128
|
24 |
+
|
25 |
+
# For SGD
|
26 |
+
config.optimizer = "sgd"
|
27 |
+
config.lr = 0.1
|
28 |
+
config.momentum = 0.9
|
29 |
+
config.weight_decay = 5e-4
|
30 |
+
|
31 |
+
# For AdamW
|
32 |
+
# config.optimizer = "adamw"
|
33 |
+
# config.lr = 0.001
|
34 |
+
# config.weight_decay = 0.1
|
35 |
+
|
36 |
+
config.verbose = 2000
|
37 |
+
config.frequent = 10
|
38 |
+
|
39 |
+
# For Large Sacle Dataset, such as WebFace42M
|
40 |
+
config.dali = False
|
41 |
+
|
42 |
+
# Gradient ACC
|
43 |
+
config.gradient_acc = 1
|
44 |
+
|
45 |
+
# setup seed
|
46 |
+
config.seed = 2048
|
47 |
+
|
48 |
+
# dataload numworkers
|
49 |
+
config.num_workers = 2
|
50 |
+
|
51 |
+
# WandB Logger
|
52 |
+
config.wandb_key = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
|
53 |
+
config.suffix_run_name = None
|
54 |
+
config.using_wandb = False
|
55 |
+
config.wandb_entity = "entity"
|
56 |
+
config.wandb_project = "project"
|
57 |
+
config.wandb_log_all = True
|
58 |
+
config.save_artifacts = False
|
59 |
+
config.wandb_resume = False # resume wandb run: Only if the you wand t resume the last run that it was interrupted
|
deep_3drecon/deep_3drecon_models/arcface_torch/configs/glint360k_mbf.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from easydict import EasyDict as edict
|
2 |
+
|
3 |
+
# make training faster
|
4 |
+
# our RAM is 256G
|
5 |
+
# mount -t tmpfs -o size=140G tmpfs /train_tmp
|
6 |
+
|
7 |
+
config = edict()
|
8 |
+
config.margin_list = (1.0, 0.0, 0.4)
|
9 |
+
config.network = "mbf"
|
10 |
+
config.resume = False
|
11 |
+
config.output = None
|
12 |
+
config.embedding_size = 512
|
13 |
+
config.sample_rate = 1.0
|
14 |
+
config.fp16 = True
|
15 |
+
config.momentum = 0.9
|
16 |
+
config.weight_decay = 1e-4
|
17 |
+
config.batch_size = 128
|
18 |
+
config.lr = 0.1
|
19 |
+
config.verbose = 2000
|
20 |
+
config.dali = False
|
21 |
+
|
22 |
+
config.rec = "/train_tmp/glint360k"
|
23 |
+
config.num_classes = 360232
|
24 |
+
config.num_image = 17091657
|
25 |
+
config.num_epoch = 20
|
26 |
+
config.warmup_epoch = 0
|
27 |
+
config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
|
deep_3drecon/deep_3drecon_models/arcface_torch/configs/glint360k_r100.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from easydict import EasyDict as edict
|
2 |
+
|
3 |
+
# make training faster
|
4 |
+
# our RAM is 256G
|
5 |
+
# mount -t tmpfs -o size=140G tmpfs /train_tmp
|
6 |
+
|
7 |
+
config = edict()
|
8 |
+
config.margin_list = (1.0, 0.0, 0.4)
|
9 |
+
config.network = "r100"
|
10 |
+
config.resume = False
|
11 |
+
config.output = None
|
12 |
+
config.embedding_size = 512
|
13 |
+
config.sample_rate = 1.0
|
14 |
+
config.fp16 = True
|
15 |
+
config.momentum = 0.9
|
16 |
+
config.weight_decay = 1e-4
|
17 |
+
config.batch_size = 128
|
18 |
+
config.lr = 0.1
|
19 |
+
config.verbose = 2000
|
20 |
+
config.dali = False
|
21 |
+
|
22 |
+
config.rec = "/train_tmp/glint360k"
|
23 |
+
config.num_classes = 360232
|
24 |
+
config.num_image = 17091657
|
25 |
+
config.num_epoch = 20
|
26 |
+
config.warmup_epoch = 0
|
27 |
+
config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
|
deep_3drecon/deep_3drecon_models/arcface_torch/configs/glint360k_r50.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from easydict import EasyDict as edict
|
2 |
+
|
3 |
+
# make training faster
|
4 |
+
# our RAM is 256G
|
5 |
+
# mount -t tmpfs -o size=140G tmpfs /train_tmp
|
6 |
+
|
7 |
+
config = edict()
|
8 |
+
config.margin_list = (1.0, 0.0, 0.4)
|
9 |
+
config.network = "r50"
|
10 |
+
config.resume = False
|
11 |
+
config.output = None
|
12 |
+
config.embedding_size = 512
|
13 |
+
config.sample_rate = 1.0
|
14 |
+
config.fp16 = True
|
15 |
+
config.momentum = 0.9
|
16 |
+
config.weight_decay = 1e-4
|
17 |
+
config.batch_size = 128
|
18 |
+
config.lr = 0.1
|
19 |
+
config.verbose = 2000
|
20 |
+
config.dali = False
|
21 |
+
|
22 |
+
config.rec = "/train_tmp/glint360k"
|
23 |
+
config.num_classes = 360232
|
24 |
+
config.num_image = 17091657
|
25 |
+
config.num_epoch = 20
|
26 |
+
config.warmup_epoch = 0
|
27 |
+
config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
|
deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv2_mbf.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from easydict import EasyDict as edict
|
2 |
+
|
3 |
+
# make training faster
|
4 |
+
# our RAM is 256G
|
5 |
+
# mount -t tmpfs -o size=140G tmpfs /train_tmp
|
6 |
+
|
7 |
+
config = edict()
|
8 |
+
config.margin_list = (1.0, 0.5, 0.0)
|
9 |
+
config.network = "mbf"
|
10 |
+
config.resume = False
|
11 |
+
config.output = None
|
12 |
+
config.embedding_size = 512
|
13 |
+
config.sample_rate = 1.0
|
14 |
+
config.fp16 = True
|
15 |
+
config.momentum = 0.9
|
16 |
+
config.weight_decay = 1e-4
|
17 |
+
config.batch_size = 128
|
18 |
+
config.lr = 0.1
|
19 |
+
config.verbose = 2000
|
20 |
+
config.dali = False
|
21 |
+
|
22 |
+
config.rec = "/train_tmp/faces_emore"
|
23 |
+
config.num_classes = 85742
|
24 |
+
config.num_image = 5822653
|
25 |
+
config.num_epoch = 40
|
26 |
+
config.warmup_epoch = 0
|
27 |
+
config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
|
deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv2_r100.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from easydict import EasyDict as edict
|
2 |
+
|
3 |
+
# make training faster
|
4 |
+
# our RAM is 256G
|
5 |
+
# mount -t tmpfs -o size=140G tmpfs /train_tmp
|
6 |
+
|
7 |
+
config = edict()
|
8 |
+
config.margin_list = (1.0, 0.5, 0.0)
|
9 |
+
config.network = "r100"
|
10 |
+
config.resume = False
|
11 |
+
config.output = None
|
12 |
+
config.embedding_size = 512
|
13 |
+
config.sample_rate = 1.0
|
14 |
+
config.fp16 = True
|
15 |
+
config.momentum = 0.9
|
16 |
+
config.weight_decay = 5e-4
|
17 |
+
config.batch_size = 128
|
18 |
+
config.lr = 0.1
|
19 |
+
config.verbose = 2000
|
20 |
+
config.dali = False
|
21 |
+
|
22 |
+
config.rec = "/train_tmp/faces_emore"
|
23 |
+
config.num_classes = 85742
|
24 |
+
config.num_image = 5822653
|
25 |
+
config.num_epoch = 20
|
26 |
+
config.warmup_epoch = 0
|
27 |
+
config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
|
deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv2_r50.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from easydict import EasyDict as edict
|
2 |
+
|
3 |
+
# make training faster
|
4 |
+
# our RAM is 256G
|
5 |
+
# mount -t tmpfs -o size=140G tmpfs /train_tmp
|
6 |
+
|
7 |
+
config = edict()
|
8 |
+
config.margin_list = (1.0, 0.5, 0.0)
|
9 |
+
config.network = "r50"
|
10 |
+
config.resume = False
|
11 |
+
config.output = None
|
12 |
+
config.embedding_size = 512
|
13 |
+
config.sample_rate = 1.0
|
14 |
+
config.fp16 = True
|
15 |
+
config.momentum = 0.9
|
16 |
+
config.weight_decay = 5e-4
|
17 |
+
config.batch_size = 128
|
18 |
+
config.lr = 0.1
|
19 |
+
config.verbose = 2000
|
20 |
+
config.dali = False
|
21 |
+
|
22 |
+
config.rec = "/train_tmp/faces_emore"
|
23 |
+
config.num_classes = 85742
|
24 |
+
config.num_image = 5822653
|
25 |
+
config.num_epoch = 20
|
26 |
+
config.warmup_epoch = 0
|
27 |
+
config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
|
deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv3_mbf.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from easydict import EasyDict as edict
|
2 |
+
|
3 |
+
# make training faster
|
4 |
+
# our RAM is 256G
|
5 |
+
# mount -t tmpfs -o size=140G tmpfs /train_tmp
|
6 |
+
|
7 |
+
config = edict()
|
8 |
+
config.margin_list = (1.0, 0.5, 0.0)
|
9 |
+
config.network = "mbf"
|
10 |
+
config.resume = False
|
11 |
+
config.output = None
|
12 |
+
config.embedding_size = 512
|
13 |
+
config.sample_rate = 1.0
|
14 |
+
config.fp16 = True
|
15 |
+
config.momentum = 0.9
|
16 |
+
config.weight_decay = 1e-4
|
17 |
+
config.batch_size = 128
|
18 |
+
config.lr = 0.1
|
19 |
+
config.verbose = 2000
|
20 |
+
config.dali = False
|
21 |
+
|
22 |
+
config.rec = "/train_tmp/ms1m-retinaface-t1"
|
23 |
+
config.num_classes = 93431
|
24 |
+
config.num_image = 5179510
|
25 |
+
config.num_epoch = 40
|
26 |
+
config.warmup_epoch = 0
|
27 |
+
config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
|
deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv3_r100.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from easydict import EasyDict as edict
|
2 |
+
|
3 |
+
# make training faster
|
4 |
+
# our RAM is 256G
|
5 |
+
# mount -t tmpfs -o size=140G tmpfs /train_tmp
|
6 |
+
|
7 |
+
config = edict()
|
8 |
+
config.margin_list = (1.0, 0.5, 0.0)
|
9 |
+
config.network = "r100"
|
10 |
+
config.resume = False
|
11 |
+
config.output = None
|
12 |
+
config.embedding_size = 512
|
13 |
+
config.sample_rate = 1.0
|
14 |
+
config.fp16 = True
|
15 |
+
config.momentum = 0.9
|
16 |
+
config.weight_decay = 5e-4
|
17 |
+
config.batch_size = 128
|
18 |
+
config.lr = 0.1
|
19 |
+
config.verbose = 2000
|
20 |
+
config.dali = False
|
21 |
+
|
22 |
+
config.rec = "/train_tmp/ms1m-retinaface-t1"
|
23 |
+
config.num_classes = 93431
|
24 |
+
config.num_image = 5179510
|
25 |
+
config.num_epoch = 20
|
26 |
+
config.warmup_epoch = 0
|
27 |
+
config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
|
deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv3_r50.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from easydict import EasyDict as edict
|
2 |
+
|
3 |
+
# make training faster
|
4 |
+
# our RAM is 256G
|
5 |
+
# mount -t tmpfs -o size=140G tmpfs /train_tmp
|
6 |
+
|
7 |
+
config = edict()
|
8 |
+
config.margin_list = (1.0, 0.5, 0.0)
|
9 |
+
config.network = "r50"
|
10 |
+
config.resume = False
|
11 |
+
config.output = None
|
12 |
+
config.embedding_size = 512
|
13 |
+
config.sample_rate = 1.0
|
14 |
+
config.fp16 = True
|
15 |
+
config.momentum = 0.9
|
16 |
+
config.weight_decay = 5e-4
|
17 |
+
config.batch_size = 128
|
18 |
+
config.lr = 0.1
|
19 |
+
config.verbose = 2000
|
20 |
+
config.dali = False
|
21 |
+
|
22 |
+
config.rec = "/train_tmp/ms1m-retinaface-t1"
|
23 |
+
config.num_classes = 93431
|
24 |
+
config.num_image = 5179510
|
25 |
+
config.num_epoch = 20
|
26 |
+
config.warmup_epoch = 0
|
27 |
+
config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
|