ameerazam08 commited on
Commit
03da825
·
verified ·
1 Parent(s): 5e24900

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. LICENSE +21 -0
  3. LICENSES/LICENSE +20 -0
  4. LICENSES/LICENSE_EG3D +99 -0
  5. LICENSES/LICENSE_GFPGAN +351 -0
  6. LICENSES/LICENSE_MODNET +201 -0
  7. LICENSES/LICENSE_SEGFORMER +64 -0
  8. README.md +100 -0
  9. additional_modules/deep3dfacerecon/BFM/.gitkeep +0 -0
  10. additional_modules/deep3dfacerecon/BFM/BFM_exp_idx.mat +0 -0
  11. additional_modules/deep3dfacerecon/BFM/BFM_front_idx.mat +0 -0
  12. additional_modules/deep3dfacerecon/BFM/facemodel_info.mat +0 -0
  13. additional_modules/deep3dfacerecon/BFM/select_vertex_id.mat +0 -0
  14. additional_modules/deep3dfacerecon/BFM/similarity_Lm3D_all.mat +0 -0
  15. additional_modules/deep3dfacerecon/BFM/std_exp.txt +1 -0
  16. additional_modules/deep3dfacerecon/LICENSE +21 -0
  17. additional_modules/deep3dfacerecon/README.md +268 -0
  18. additional_modules/deep3dfacerecon/__init__.py +3 -0
  19. additional_modules/deep3dfacerecon/__pycache__/__init__.cpython-310.pyc +0 -0
  20. additional_modules/deep3dfacerecon/data/__init__.py +116 -0
  21. additional_modules/deep3dfacerecon/data/base_dataset.py +131 -0
  22. additional_modules/deep3dfacerecon/data/flist_dataset.py +125 -0
  23. additional_modules/deep3dfacerecon/data/image_folder.py +66 -0
  24. additional_modules/deep3dfacerecon/data/template_dataset.py +75 -0
  25. additional_modules/deep3dfacerecon/data_preparation.py +45 -0
  26. additional_modules/deep3dfacerecon/environment.yml +24 -0
  27. additional_modules/deep3dfacerecon/models/__init__.py +0 -0
  28. additional_modules/deep3dfacerecon/models/__pycache__/__init__.cpython-310.pyc +0 -0
  29. additional_modules/deep3dfacerecon/models/__pycache__/base_model.cpython-310.pyc +0 -0
  30. additional_modules/deep3dfacerecon/models/__pycache__/facerecon_model.cpython-310.pyc +0 -0
  31. additional_modules/deep3dfacerecon/models/__pycache__/networks.cpython-310.pyc +0 -0
  32. additional_modules/deep3dfacerecon/models/arcface_torch/README.md +218 -0
  33. additional_modules/deep3dfacerecon/models/arcface_torch/backbones/__init__.py +92 -0
  34. additional_modules/deep3dfacerecon/models/arcface_torch/backbones/iresnet.py +194 -0
  35. additional_modules/deep3dfacerecon/models/arcface_torch/backbones/iresnet2060.py +176 -0
  36. additional_modules/deep3dfacerecon/models/arcface_torch/backbones/mobilefacenet.py +147 -0
  37. additional_modules/deep3dfacerecon/models/arcface_torch/backbones/vit.py +280 -0
  38. additional_modules/deep3dfacerecon/models/arcface_torch/configs/3millions.py +23 -0
  39. additional_modules/deep3dfacerecon/models/arcface_torch/configs/__init__.py +0 -0
  40. additional_modules/deep3dfacerecon/models/arcface_torch/configs/base.py +60 -0
  41. additional_modules/deep3dfacerecon/models/arcface_torch/configs/glint360k_mbf.py +27 -0
  42. additional_modules/deep3dfacerecon/models/arcface_torch/configs/glint360k_r100.py +27 -0
  43. additional_modules/deep3dfacerecon/models/arcface_torch/configs/glint360k_r50.py +27 -0
  44. additional_modules/deep3dfacerecon/models/arcface_torch/configs/ms1mv2_mbf.py +27 -0
  45. additional_modules/deep3dfacerecon/models/arcface_torch/configs/ms1mv2_r100.py +27 -0
  46. additional_modules/deep3dfacerecon/models/arcface_torch/configs/ms1mv2_r50.py +27 -0
  47. additional_modules/deep3dfacerecon/models/arcface_torch/configs/ms1mv3_mbf.py +27 -0
  48. additional_modules/deep3dfacerecon/models/arcface_torch/configs/ms1mv3_r100.py +27 -0
  49. additional_modules/deep3dfacerecon/models/arcface_torch/configs/ms1mv3_r50.py +27 -0
  50. additional_modules/deep3dfacerecon/models/arcface_torch/configs/ms1mv3_r50_onegpu.py +27 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ output_video.mp4 filter=lfs diff=lfs merge=lfs -text
37
+ resources/github_readme/teaser.gif filter=lfs diff=lfs merge=lfs -text
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 MBZUAI-Metaverse
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
LICENSES/LICENSE ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright (c) 2024 MBZUAI Metaverse Center
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
LICENSES/LICENSE_EG3D ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright (c) 2021-2022, NVIDIA Corporation & affiliates. All rights
2
+ reserved.
3
+
4
+
5
+ NVIDIA Source Code License for EG3D
6
+
7
+
8
+ =======================================================================
9
+
10
+ 1. Definitions
11
+
12
+ "Licensor" means any person or entity that distributes its Work.
13
+
14
+ "Software" means the original work of authorship made available under
15
+ this License.
16
+
17
+ "Work" means the Software and any additions to or derivative works of
18
+ the Software that are made available under this License.
19
+
20
+ The terms "reproduce," "reproduction," "derivative works," and
21
+ "distribution" have the meaning as provided under U.S. copyright law;
22
+ provided, however, that for the purposes of this License, derivative
23
+ works shall not include works that remain separable from, or merely
24
+ link (or bind by name) to the interfaces of, the Work.
25
+
26
+ Works, including the Software, are "made available" under this License
27
+ by including in or with the Work either (a) a copyright notice
28
+ referencing the applicability of this License to the Work, or (b) a
29
+ copy of this License.
30
+
31
+ 2. License Grants
32
+
33
+ 2.1 Copyright Grant. Subject to the terms and conditions of this
34
+ License, each Licensor grants to you a perpetual, worldwide,
35
+ non-exclusive, royalty-free, copyright license to reproduce,
36
+ prepare derivative works of, publicly display, publicly perform,
37
+ sublicense and distribute its Work and any resulting derivative
38
+ works in any form.
39
+
40
+ 3. Limitations
41
+
42
+ 3.1 Redistribution. You may reproduce or distribute the Work only
43
+ if (a) you do so under this License, (b) you include a complete
44
+ copy of this License with your distribution, and (c) you retain
45
+ without modification any copyright, patent, trademark, or
46
+ attribution notices that are present in the Work.
47
+
48
+ 3.2 Derivative Works. You may specify that additional or different
49
+ terms apply to the use, reproduction, and distribution of your
50
+ derivative works of the Work ("Your Terms") only if (a) Your Terms
51
+ provide that the use limitation in Section 3.3 applies to your
52
+ derivative works, and (b) you identify the specific derivative
53
+ works that are subject to Your Terms. Notwithstanding Your Terms,
54
+ this License (including the redistribution requirements in Section
55
+ 3.1) will continue to apply to the Work itself.
56
+
57
+ 3.3 Use Limitation. The Work and any derivative works thereof only
58
+ may be used or intended for use non-commercially. The Work or
59
+ derivative works thereof may be used or intended for use by NVIDIA
60
+ or it’s affiliates commercially or non-commercially. As used
61
+ herein, "non-commercially" means for research or evaluation
62
+ purposes only and not for any direct or indirect monetary gain.
63
+
64
+ 3.4 Patent Claims. If you bring or threaten to bring a patent claim
65
+ against any Licensor (including any claim, cross-claim or
66
+ counterclaim in a lawsuit) to enforce any patents that you allege
67
+ are infringed by any Work, then your rights under this License from
68
+ such Licensor (including the grants in Sections 2.1) will terminate
69
+ immediately.
70
+
71
+ 3.5 Trademarks. This License does not grant any rights to use any
72
+ Licensor’s or its affiliates’ names, logos, or trademarks, except
73
+ as necessary to reproduce the notices described in this License.
74
+
75
+ 3.6 Termination. If you violate any term of this License, then your
76
+ rights under this License (including the grants in Sections 2.1)
77
+ will terminate immediately.
78
+
79
+ 4. Disclaimer of Warranty.
80
+
81
+ THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
82
+ KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
83
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
84
+ NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
85
+ THIS LICENSE.
86
+
87
+ 5. Limitation of Liability.
88
+
89
+ EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
90
+ THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
91
+ SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
92
+ INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
93
+ OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
94
+ (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
95
+ LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
96
+ COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
97
+ THE POSSIBILITY OF SUCH DAMAGES.
98
+
99
+ =======================================================================
LICENSES/LICENSE_GFPGAN ADDED
@@ -0,0 +1,351 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Tencent is pleased to support the open source community by making GFPGAN available.
2
+
3
+ Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
4
+
5
+ GFPGAN is licensed under the Apache License Version 2.0 except for the third-party components listed below.
6
+
7
+
8
+ Terms of the Apache License Version 2.0:
9
+ ---------------------------------------------
10
+ Apache License
11
+
12
+ Version 2.0, January 2004
13
+
14
+ http://www.apache.org/licenses/
15
+
16
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
17
+ 1. Definitions.
18
+
19
+ “License” shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
20
+
21
+ “Licensor” shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
22
+
23
+ “Legal Entity” shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, “control” means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
24
+
25
+ “You” (or “Your”) shall mean an individual or Legal Entity exercising permissions granted by this License.
26
+
27
+ “Source” form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
28
+
29
+ “Object” form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
30
+
31
+ “Work” shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
32
+
33
+ “Derivative Works” shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
34
+
35
+ “Contribution” shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, “submitted” means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as “Not a Contribution.”
36
+
37
+ “Contributor” shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.
38
+
39
+ 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.
40
+
41
+ 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.
42
+
43
+ 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:
44
+
45
+ You must give any other recipients of the Work or Derivative Works a copy of this License; and
46
+
47
+ You must cause any modified files to carry prominent notices stating that You changed the files; and
48
+
49
+ You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
50
+
51
+ If the Work includes a “NOTICE” text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.
52
+
53
+ You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.
54
+
55
+ 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.
56
+
57
+ 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.
58
+
59
+ 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.
60
+
61
+ 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.
62
+
63
+ 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.
64
+
65
+ END OF TERMS AND CONDITIONS
66
+
67
+
68
+
69
+ Other dependencies and licenses:
70
+
71
+
72
+ Open Source Software licensed under the Apache 2.0 license and Other Licenses of the Third-Party Components therein:
73
+ ---------------------------------------------
74
+ 1. basicsr
75
+ Copyright 2018-2020 BasicSR Authors
76
+
77
+
78
+ This BasicSR project is released under the Apache 2.0 license.
79
+
80
+ A copy of Apache 2.0 is included in this file.
81
+
82
+ StyleGAN2
83
+ The codes are modified from the repository stylegan2-pytorch. Many thanks to the author - Kim Seonghyeon 😊 for translating from the official TensorFlow codes to PyTorch ones. Here is the license of stylegan2-pytorch.
84
+ The official repository is https://github.com/NVlabs/stylegan2, and here is the NVIDIA license.
85
+ DFDNet
86
+ The codes are largely modified from the repository DFDNet. Their license is Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.
87
+
88
+ Terms of the Nvidia License:
89
+ ---------------------------------------------
90
+
91
+ 1. Definitions
92
+
93
+ "Licensor" means any person or entity that distributes its Work.
94
+
95
+ "Software" means the original work of authorship made available under
96
+ this License.
97
+
98
+ "Work" means the Software and any additions to or derivative works of
99
+ the Software that are made available under this License.
100
+
101
+ "Nvidia Processors" means any central processing unit (CPU), graphics
102
+ processing unit (GPU), field-programmable gate array (FPGA),
103
+ application-specific integrated circuit (ASIC) or any combination
104
+ thereof designed, made, sold, or provided by Nvidia or its affiliates.
105
+
106
+ The terms "reproduce," "reproduction," "derivative works," and
107
+ "distribution" have the meaning as provided under U.S. copyright law;
108
+ provided, however, that for the purposes of this License, derivative
109
+ works shall not include works that remain separable from, or merely
110
+ link (or bind by name) to the interfaces of, the Work.
111
+
112
+ Works, including the Software, are "made available" under this License
113
+ by including in or with the Work either (a) a copyright notice
114
+ referencing the applicability of this License to the Work, or (b) a
115
+ copy of this License.
116
+
117
+ 2. License Grants
118
+
119
+ 2.1 Copyright Grant. Subject to the terms and conditions of this
120
+ License, each Licensor grants to you a perpetual, worldwide,
121
+ non-exclusive, royalty-free, copyright license to reproduce,
122
+ prepare derivative works of, publicly display, publicly perform,
123
+ sublicense and distribute its Work and any resulting derivative
124
+ works in any form.
125
+
126
+ 3. Limitations
127
+
128
+ 3.1 Redistribution. You may reproduce or distribute the Work only
129
+ if (a) you do so under this License, (b) you include a complete
130
+ copy of this License with your distribution, and (c) you retain
131
+ without modification any copyright, patent, trademark, or
132
+ attribution notices that are present in the Work.
133
+
134
+ 3.2 Derivative Works. You may specify that additional or different
135
+ terms apply to the use, reproduction, and distribution of your
136
+ derivative works of the Work ("Your Terms") only if (a) Your Terms
137
+ provide that the use limitation in Section 3.3 applies to your
138
+ derivative works, and (b) you identify the specific derivative
139
+ works that are subject to Your Terms. Notwithstanding Your Terms,
140
+ this License (including the redistribution requirements in Section
141
+ 3.1) will continue to apply to the Work itself.
142
+
143
+ 3.3 Use Limitation. The Work and any derivative works thereof only
144
+ may be used or intended for use non-commercially. The Work or
145
+ derivative works thereof may be used or intended for use by Nvidia
146
+ or its affiliates commercially or non-commercially. As used herein,
147
+ "non-commercially" means for research or evaluation purposes only.
148
+
149
+ 3.4 Patent Claims. If you bring or threaten to bring a patent claim
150
+ against any Licensor (including any claim, cross-claim or
151
+ counterclaim in a lawsuit) to enforce any patents that you allege
152
+ are infringed by any Work, then your rights under this License from
153
+ such Licensor (including the grants in Sections 2.1 and 2.2) will
154
+ terminate immediately.
155
+
156
+ 3.5 Trademarks. This License does not grant any rights to use any
157
+ Licensor's or its affiliates' names, logos, or trademarks, except
158
+ as necessary to reproduce the notices described in this License.
159
+
160
+ 3.6 Termination. If you violate any term of this License, then your
161
+ rights under this License (including the grants in Sections 2.1 and
162
+ 2.2) will terminate immediately.
163
+
164
+ 4. Disclaimer of Warranty.
165
+
166
+ THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
167
+ KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
168
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
169
+ NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
170
+ THIS LICENSE.
171
+
172
+ 5. Limitation of Liability.
173
+
174
+ EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
175
+ THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
176
+ SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
177
+ INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
178
+ OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
179
+ (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
180
+ LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
181
+ COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
182
+ THE POSSIBILITY OF SUCH DAMAGES.
183
+
184
+ MIT License
185
+
186
+ Copyright (c) 2019 Kim Seonghyeon
187
+
188
+ Permission is hereby granted, free of charge, to any person obtaining a copy
189
+ of this software and associated documentation files (the "Software"), to deal
190
+ in the Software without restriction, including without limitation the rights
191
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
192
+ copies of the Software, and to permit persons to whom the Software is
193
+ furnished to do so, subject to the following conditions:
194
+
195
+ The above copyright notice and this permission notice shall be included in all
196
+ copies or substantial portions of the Software.
197
+
198
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
199
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
200
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
201
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
202
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
203
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
204
+ SOFTWARE.
205
+
206
+
207
+
208
+ Open Source Software licensed under the BSD 3-Clause license:
209
+ ---------------------------------------------
210
+ 1. torchvision
211
+ Copyright (c) Soumith Chintala 2016,
212
+ All rights reserved.
213
+
214
+ 2. torch
215
+ Copyright (c) 2016- Facebook, Inc (Adam Paszke)
216
+ Copyright (c) 2014- Facebook, Inc (Soumith Chintala)
217
+ Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
218
+ Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
219
+ Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
220
+ Copyright (c) 2011-2013 NYU (Clement Farabet)
221
+ Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
222
+ Copyright (c) 2006 Idiap Research Institute (Samy Bengio)
223
+ Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
224
+
225
+
226
+ Terms of the BSD 3-Clause License:
227
+ ---------------------------------------------
228
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
229
+
230
+ 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
231
+
232
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
233
+
234
+ 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
235
+
236
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
237
+
238
+
239
+
240
+ Open Source Software licensed under the BSD 3-Clause License and Other Licenses of the Third-Party Components therein:
241
+ ---------------------------------------------
242
+ 1. numpy
243
+ Copyright (c) 2005-2020, NumPy Developers.
244
+ All rights reserved.
245
+
246
+ A copy of BSD 3-Clause License is included in this file.
247
+
248
+ The NumPy repository and source distributions bundle several libraries that are
249
+ compatibly licensed. We list these here.
250
+
251
+ Name: Numpydoc
252
+ Files: doc/sphinxext/numpydoc/*
253
+ License: BSD-2-Clause
254
+ For details, see doc/sphinxext/LICENSE.txt
255
+
256
+ Name: scipy-sphinx-theme
257
+ Files: doc/scipy-sphinx-theme/*
258
+ License: BSD-3-Clause AND PSF-2.0 AND Apache-2.0
259
+ For details, see doc/scipy-sphinx-theme/LICENSE.txt
260
+
261
+ Name: lapack-lite
262
+ Files: numpy/linalg/lapack_lite/*
263
+ License: BSD-3-Clause
264
+ For details, see numpy/linalg/lapack_lite/LICENSE.txt
265
+
266
+ Name: tempita
267
+ Files: tools/npy_tempita/*
268
+ License: MIT
269
+ For details, see tools/npy_tempita/license.txt
270
+
271
+ Name: dragon4
272
+ Files: numpy/core/src/multiarray/dragon4.c
273
+ License: MIT
274
+ For license text, see numpy/core/src/multiarray/dragon4.c
275
+
276
+
277
+
278
+ Open Source Software licensed under the MIT license:
279
+ ---------------------------------------------
280
+ 1. facexlib
281
+ Copyright (c) 2020 Xintao Wang
282
+
283
+ 2. opencv-python
284
+ Copyright (c) Olli-Pekka Heinisuo
285
+ Please note that only files in cv2 package are used.
286
+
287
+
288
+ Terms of the MIT License:
289
+ ---------------------------------------------
290
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
291
+
292
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
293
+
294
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
295
+
296
+
297
+
298
+ Open Source Software licensed under the MIT license and Other Licenses of the Third-Party Components therein:
299
+ ---------------------------------------------
300
+ 1. tqdm
301
+ Copyright (c) 2013 noamraph
302
+
303
+ `tqdm` is a product of collaborative work.
304
+ Unless otherwise stated, all authors (see commit logs) retain copyright
305
+ for their respective work, and release the work under the MIT licence
306
+ (text below).
307
+
308
+ Exceptions or notable authors are listed below
309
+ in reverse chronological order:
310
+
311
+ * files: *
312
+ MPLv2.0 2015-2020 (c) Casper da Costa-Luis
313
+ [casperdcl](https://github.com/casperdcl).
314
+ * files: tqdm/_tqdm.py
315
+ MIT 2016 (c) [PR #96] on behalf of Google Inc.
316
+ * files: tqdm/_tqdm.py setup.py README.rst MANIFEST.in .gitignore
317
+ MIT 2013 (c) Noam Yorav-Raphael, original author.
318
+
319
+ [PR #96]: https://github.com/tqdm/tqdm/pull/96
320
+
321
+
322
+ Mozilla Public Licence (MPL) v. 2.0 - Exhibit A
323
+ -----------------------------------------------
324
+
325
+ This Source Code Form is subject to the terms of the
326
+ Mozilla Public License, v. 2.0.
327
+ If a copy of the MPL was not distributed with this file,
328
+ You can obtain one at https://mozilla.org/MPL/2.0/.
329
+
330
+
331
+ MIT License (MIT)
332
+ -----------------
333
+
334
+ Copyright (c) 2013 noamraph
335
+
336
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
337
+ this software and associated documentation files (the "Software"), to deal in
338
+ the Software without restriction, including without limitation the rights to
339
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
340
+ the Software, and to permit persons to whom the Software is furnished to do so,
341
+ subject to the following conditions:
342
+
343
+ The above copyright notice and this permission notice shall be included in all
344
+ copies or substantial portions of the Software.
345
+
346
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
347
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
348
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
349
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
350
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
351
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
LICENSES/LICENSE_MODNET ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
LICENSES/LICENSE_SEGFORMER ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NVIDIA Source Code License for SegFormer
2
+
3
+ 1. Definitions
4
+
5
+ “Licensor” means any person or entity that distributes its Work.
6
+
7
+ “Software” means the original work of authorship made available under this License.
8
+
9
+ “Work” means the Software and any additions to or derivative works of the Software that are made available under
10
+ this License.
11
+
12
+ The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” have the meaning as provided under
13
+ U.S. copyright law; provided, however, that for the purposes of this License, derivative works shall not include
14
+ works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work.
15
+
16
+ Works, including the Software, are “made available” under this License by including in or with the Work either
17
+ (a) a copyright notice referencing the applicability of this License to the Work, or (b) a copy of this License.
18
+
19
+ 2. License Grant
20
+
21
+ 2.1 Copyright Grant. Subject to the terms and conditions of this License, each Licensor grants to you a perpetual,
22
+ worldwide, non-exclusive, royalty-free, copyright license to reproduce, prepare derivative works of, publicly
23
+ display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form.
24
+
25
+ 3. Limitations
26
+
27
+ 3.1 Redistribution. You may reproduce or distribute the Work only if (a) you do so under this License, (b) you
28
+ include a complete copy of this License with your distribution, and (c) you retain without modification any
29
+ copyright, patent, trademark, or attribution notices that are present in the Work.
30
+
31
+ 3.2 Derivative Works. You may specify that additional or different terms apply to the use, reproduction, and
32
+ distribution of your derivative works of the Work (“Your Terms”) only if (a) Your Terms provide that the use
33
+ limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works
34
+ that are subject to Your Terms. Notwithstanding Your Terms, this License (including the redistribution
35
+ requirements in Section 3.1) will continue to apply to the Work itself.
36
+
37
+ 3.3 Use Limitation. The Work and any derivative works thereof only may be used or intended for use
38
+ non-commercially. Notwithstanding the foregoing, NVIDIA and its affiliates may use the Work and any derivative
39
+ works commercially. As used herein, “non-commercially” means for research or evaluation purposes only.
40
+
41
+ 3.4 Patent Claims. If you bring or threaten to bring a patent claim against any Licensor (including any claim,
42
+ cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then
43
+ your rights under this License from such Licensor (including the grant in Section 2.1) will terminate immediately.
44
+
45
+ 3.5 Trademarks. This License does not grant any rights to use any Licensor’s or its affiliates’ names, logos,
46
+ or trademarks, except as necessary to reproduce the notices described in this License.
47
+
48
+ 3.6 Termination. If you violate any term of this License, then your rights under this License (including the
49
+ grant in Section 2.1) will terminate immediately.
50
+
51
+ 4. Disclaimer of Warranty.
52
+
53
+ THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
54
+ WARRANTIES OR CONDITIONS OF M ERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU
55
+ BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE.
56
+
57
+ 5. Limitation of Liability.
58
+
59
+ EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING
60
+ NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
61
+ INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR
62
+ INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR
63
+ DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMM ERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN
64
+ ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
README.md ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # [CVPR 2024] VOODOO 3D: <ins>Vo</ins>lumetric P<ins>o</ins>rtrait <ins>D</ins>isentanglement f<ins>o</ins>r <ins>O</ins>ne-Shot 3D Head Reenactment
2
+
3
+ [![arXiv](https://img.shields.io/badge/arXiv-2312.04651-red?logo=arxiv&logoColor=red)](https://arxiv.org/abs/2312.04651)
4
+ [![youtube](https://img.shields.io/badge/video-Youtube-white?logo=youtube&logoColor=red)](https://arxiv.org/abs/2312.04651)
5
+ [![homepage](https://img.shields.io/badge/project-Homepage-orange?logo=Homepage&logoColor=orange)](https://arxiv.org/abs/2312.04651)
6
+ [![LICENSE](https://img.shields.io/badge/license-MIT-blue?logo=C&logoColor=blue)](https://github.com/MBZUAI-Metaverse/VOODOO3D-official/LICENSE)
7
+
8
+ ![teaser](./resources/github_readme/teaser.gif)
9
+
10
+ ## Overview
11
+ This is the official implementation of VOODOO 3D: a high-fidelity 3D-aware one-shot head reenactment technique. Our method transfers the expression of a driver to a source and produces view consistent renderings for holographic displays.
12
+
13
+ For more details of the method and experimental results of the project, please checkout our [paper](https://arxiv.org/abs/2312.04651), [youtube video](https://www.youtube.com/watch?v=Gu3oPG0_BaE), or the [project page](https://p0lyfish.github.io/voodoo3d/).
14
+
15
+ ## Installation
16
+ First, clone the project:
17
+ ```
18
+ git clone https://github.com/MBZUAI-Metaverse/VOODOO3D-official
19
+ ```
20
+ The implementation only requires standard libraries. You can install all the dependencies using conda and pip:
21
+ ```
22
+ conda create -n voodoo3d python=3.10 pytorch=2.3.0 torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
23
+
24
+ pip install -r requirements.txt
25
+ ```
26
+
27
+ Next, prepare the pretrained weights and put them into `./pretrained_models`:
28
+ - Foreground Extractor: Donwload weights provided by [MODNet](https://github.com/ZHKKKe/MODNet) using [this link](https://drive.google.com/file/d/1mcr7ALciuAsHCpLnrtG_eop5-EYhbCmz/view?usp=drive_link)
29
+ - Pose estimation: Download weights provided by [Deep3DFaceRecon_pytorch](https://github.com/sicxu/Deep3DFaceRecon_pytorch) using [this link](https://mbzuaiac-my.sharepoint.com/:u:/g/personal/the_tran_mbzuai_ac_ae/EXlLGrp1Km1EkhObscL8r18BwI39MEq-4QLHb5MQMN0egw?e=gNfQI9)
30
+ - [Our pretrained weights](https://mbzuaiac-my.sharepoint.com/:u:/g/personal/the_tran_mbzuai_ac_ae/ETxx3EQF6QFPkviUD9ivk6EBmdVrE8_0j8qtIi59ThkBBQ?e=UkSCh2)
31
+
32
+ ## Inference
33
+ ### 3D Head Reenactment
34
+ Use the following command to test the model:
35
+ ```
36
+ python test_voodoo3d.py --source_root <IMAGE_FOLDERS / IMAGE_PATH> \
37
+ --driver_root <IMAGE_FOLDERS / IMAGE_PATH> \
38
+ --config_path configs/voodoo3d.yml \
39
+ --model_path pretrained_models/voodoo3d.pth \
40
+ --save_root <SAVE_ROOT> \
41
+ ```
42
+ Where `source_root` and `driver_root` are either image folders or image paths of the sources and drivers respectively. `save_root` is the folder root that you want to save the results. This script will generate pairwise reenactment results of the sources and drivers in the input folders / paths. For example, to test with our provided images:
43
+ ```
44
+ python test_voodoo3d.py --source_root resources/images/sources \
45
+ --driver_root resources/images/drivers \
46
+ --config_path configs/voodoo3d.yml \
47
+ --model_path pretrained_models/voodoo3d.pth \
48
+ --save_root results/voodoo3d_test \
49
+ ```
50
+ ### Fine-tuned Lp3D for 3D Reconstruction
51
+ [Lp3D](https://research.nvidia.com/labs/nxp/lp3d/) is the state-of-the-art 3D Portrait Reconstruction model. As mentioned in the VOODOO 3D paper, we had a reimplementation of this model but fine-tuned on in-the-wild data. To evaluate this model, use the following script:
52
+ ```
53
+ python test_lp3d.py --source_root <IMAGE_FOLDERS / IMAGE_PATH> \
54
+ --config_path configs/lp3d.yml \
55
+ --model_path pretrained_models/voodoo3d.pth \
56
+ --save_root <SAVE_ROOT> \
57
+ --cam_batch_size <BATCH_SIZE>
58
+ ```
59
+ where `source_root` is either an image folder or an image path of the images that will be reconstructed in 3D. `SAVE_ROOT` is the destination of the results. `BATCH_SIZE` is the testing batch size (the higher, the faster). For each image in the input folder, the model will generate a rendered video of its corresponding 3D head using a fixed camera trajectory. Here is an example using our provided images:
60
+ ```
61
+ python test_lp3d.py --source_root resources/images/sources \
62
+ --config_path configs/lp3d.yml \
63
+ --model_path pretrained_models/voodoo3d.pth \
64
+ --save_root results/lp3d_test \
65
+ --cam_batch_size 2
66
+ ```
67
+
68
+ ## License
69
+
70
+ Our implementation uses modified versions of other projects that has different licenses. Specifically:
71
+ - GPFGAN and MODNet, is distributed under Apache License version 2.0.
72
+ - EG3D and SegFormer is distributed under NVIDIA Source Code License.
73
+
74
+ Other code if not stated otherwise is licensed under the MIT License. See the [LICENSES](LICENSES) file for details.
75
+
76
+ ## Acknowledgements
77
+ This work would not be possible without the following projects:
78
+
79
+ - [eg3d](https://github.com/NVlabs/eg3d): We used portions of the data preprocessing and the generative model code to synthesize the data during training.
80
+ - [Deep3DFaceRecon_pytorch](https://github.com/sicxu/Deep3DFaceRecon_pytorch): We used portions of this code to predict the camera pose and process the data.
81
+ - [segmentation_models.pytorch](https://github.com/qubvel/segmentation_models.pytorch): We used portions of DeepLabV3 implementation from this project.
82
+ - [MODNet](https://github.com/ZHKKKe/MODNet): We used portions of the foreground extraction code from this project.
83
+ - [SegFormer](https://github.com/NVlabs/SegFormer): We used portions of the transformer blocks from this project.
84
+ - [GFPGAN](https://github.com/TencentARC/GFPGAN): We used portions of GFPGAN as our super-resolution module
85
+
86
+ If you see your code used in this implementation but haven't properly acknowledged, please contact me via [[email protected]]([email protected]).
87
+
88
+ ## BibTeX
89
+ If our code is useful for your research or application, please cite our paper:
90
+ ```
91
+ @inproceedings{tran2023voodoo,
92
+ title = {VOODOO 3D: Volumetric Portrait Disentanglement for One-Shot 3D Head Reenactment},
93
+ author = {Tran, Phong and Zakharov, Egor and Ho, Long-Nhat and Tran, Anh Tuan and Hu, Liwen and Li, Hao},
94
+ year = 2024,
95
+ booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}
96
+ }
97
+ ```
98
+
99
+ ## Contact
100
+ For any questions or issues, please open an issue or contact [[email protected]](mailto:[email protected]).
additional_modules/deep3dfacerecon/BFM/.gitkeep ADDED
File without changes
additional_modules/deep3dfacerecon/BFM/BFM_exp_idx.mat ADDED
Binary file (91.9 kB). View file
 
additional_modules/deep3dfacerecon/BFM/BFM_front_idx.mat ADDED
Binary file (44.9 kB). View file
 
additional_modules/deep3dfacerecon/BFM/facemodel_info.mat ADDED
Binary file (739 kB). View file
 
additional_modules/deep3dfacerecon/BFM/select_vertex_id.mat ADDED
Binary file (62.3 kB). View file
 
additional_modules/deep3dfacerecon/BFM/similarity_Lm3D_all.mat ADDED
Binary file (994 Bytes). View file
 
additional_modules/deep3dfacerecon/BFM/std_exp.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 453980 257264 263068 211890 135873 184721 47055.6 72732 62787.4 106226 56708.5 51439.8 34887.1 44378.7 51813.4 31030.7 23354.9 23128.1 19400 21827.6 22767.7 22057.4 19894.3 16172.8 17142.7 10035.3 14727.5 12972.5 10763.8 8953.93 8682.62 8941.81 6342.3 5205.3 7065.65 6083.35 6678.88 4666.63 5082.89 5134.76 4908.16 3964.93 3739.95 3180.09 2470.45 1866.62 1624.71 2423.74 1668.53 1471.65 1194.52 782.102 815.044 835.782 834.937 744.496 575.146 633.76 705.685 753.409 620.306 673.326 766.189 619.866 559.93 357.264 396.472 556.849 455.048 460.592 400.735 326.702 279.428 291.535 326.584 305.664 287.816 283.642 276.19
additional_modules/deep3dfacerecon/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Sicheng Xu
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
additional_modules/deep3dfacerecon/README.md ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Accurate 3D Face Reconstruction with Weakly-Supervised Learning: From Single Image to Image Set —— PyTorch implementation ##
2
+
3
+ <p align="center">
4
+ <img src="images/example.gif">
5
+ </p>
6
+
7
+ This is an unofficial official pytorch implementation of the following paper:
8
+
9
+ Y. Deng, J. Yang, S. Xu, D. Chen, Y. Jia, and X. Tong, [Accurate 3D Face Reconstruction with Weakly-Supervised Learning: From Single Image to Image Set](https://arxiv.org/abs/1903.08527), IEEE Computer Vision and Pattern Recognition Workshop (CVPRW) on Analysis and Modeling of Faces and Gestures (AMFG), 2019. (**_Best Paper Award!_**)
10
+
11
+ The method enforces a hybrid-level weakly-supervised training for CNN-based 3D face reconstruction. It is fast, accurate, and robust to pose and occlussions. It achieves state-of-the-art performance on multiple datasets such as FaceWarehouse, MICC Florence and NoW Challenge.
12
+
13
+
14
+ For the original tensorflow implementation, check this [repo](https://github.com/microsoft/Deep3DFaceReconstruction).
15
+
16
+ This implementation is written by S. Xu.
17
+ ## 04/25/2023 Update
18
+ We updated a new model to improve the results on "closed eye" images. We collected ~2K facial images with closed eyes and included them in the training data. The updated model has similar reconstruction accuracy as the previous one on the benchmarks, but has better results for faces with closed eyes (see below). Here's the [link (google drive)](https://drive.google.com/drive/folders/1grs8J4vu7gOhEClyKjWU-SNxfonGue5F?usp=share_link) to the new model.
19
+ ### ● Reconstruction accuracy
20
+
21
+ |Method|FaceWareHouse|MICC Florence
22
+ |:----:|:-----------:|:-----------:|
23
+ |Deep3DFace_PyTorch_20230425|1.60±0.44|1.54±0.49|
24
+
25
+ ### ● Visual quality
26
+ <p align="center">
27
+ <img src="images/20230425_compare.png">
28
+ </p>
29
+
30
+ ## Performance
31
+
32
+ ### ● Reconstruction accuracy
33
+
34
+ The pytorch implementation achieves lower shape reconstruction error (9% improvement) compare to the [original tensorflow implementation](https://github.com/microsoft/Deep3DFaceReconstruction). Quantitative evaluation (average shape errors in mm) on several benchmarks is as follows:
35
+
36
+ |Method|FaceWareHouse|MICC Florence | NoW Challenge |
37
+ |:----:|:-----------:|:-----------:|:-----------:|
38
+ |Deep3DFace Tensorflow | 1.81±0.50 | 1.67±0.50 | 1.54±1.29 |
39
+ |**Deep3DFace PyTorch** |**1.64±0.50**|**1.53±0.45**| **1.41±1.21** |
40
+
41
+ The comparison result with state-of-the-art public 3D face reconstruction methods on the NoW face benchmark is as follows:
42
+ |Rank|Method|Median(mm) | Mean(mm) | Std(mm) |
43
+ |:----:|:-----------:|:-----------:|:-----------:|:-----------:|
44
+ | 1. | [DECA\[Feng et al., SIGGRAPH 2021\]](https://github.com/YadiraF/DECA)|1.09|1.38|1.18|
45
+ | **2.** | **Deep3DFace PyTorch**|**1.11**|**1.41**|**1.21**|
46
+ | 3. | [RingNet [Sanyal et al., CVPR 2019]](https://github.com/soubhiksanyal/RingNet) | 1.21 | 1.53 | 1.31 |
47
+ | 4. | [Deep3DFace [Deng et al., CVPRW 2019]](https://github.com/microsoft/Deep3DFaceReconstruction) | 1.23 | 1.54 | 1.29 |
48
+ | 5. | [3DDFA-V2 [Guo et al., ECCV 2020]](https://github.com/cleardusk/3DDFA_V2) | 1.23 | 1.57 | 1.39 |
49
+ | 6. | [MGCNet [Shang et al., ECCV 2020]](https://github.com/jiaxiangshang/MGCNet) | 1.31 | 1.87 | 2.63 |
50
+ | 7. | [PRNet [Feng et al., ECCV 2018]](https://github.com/YadiraF/PRNet) | 1.50 | 1.98 | 1.88 |
51
+ | 8. | [3DMM-CNN [Tran et al., CVPR 2017]](https://github.com/anhttran/3dmm_cnn) | 1.84 | 2.33 | 2.05 |
52
+
53
+ For more details about the evaluation, check [Now Challenge](https://ringnet.is.tue.mpg.de/challenge.html) website.
54
+
55
+ **_A recent benchmark [REALY](https://www.realy3dface.com/) indicates that our method still has the SOTA performance! You can check their paper and website for more details._**
56
+
57
+ ### ● Visual quality
58
+ The pytorch implementation achieves better visual consistency with the input images compare to the original tensorflow version.
59
+
60
+ <p align="center">
61
+ <img src="images/compare.png">
62
+ </p>
63
+
64
+ ### ● Speed
65
+ The training speed is on par with the original tensorflow implementation. For more information, see [here](https://github.com/sicxu/Deep3DFaceRecon_pytorch#train-the-face-reconstruction-network).
66
+
67
+ ## Major changes
68
+
69
+ ### ● Differentiable renderer
70
+
71
+ We use [Nvdiffrast](https://nvlabs.github.io/nvdiffrast/) which is a pytorch library that provides high-performance primitive operations for rasterization-based differentiable rendering. The original tensorflow implementation used [tf_mesh_renderer](https://github.com/google/tf_mesh_renderer) instead.
72
+
73
+ ### ● Face recognition model
74
+
75
+ We use [Arcface](https://github.com/deepinsight/insightface/tree/master/recognition/arcface_torch), a state-of-the-art face recognition model, for perceptual loss computation. By contrast, the original tensorflow implementation used [Facenet](https://github.com/davidsandberg/facenet).
76
+
77
+ ### ● Training configuration
78
+
79
+ Data augmentation is used in the training process which contains random image shifting, scaling, rotation, and flipping. We also enlarge the training batchsize from 5 to 32 to stablize the training process.
80
+
81
+ ### ● Training data
82
+
83
+ We use an extra high quality face image dataset [FFHQ](https://github.com/NVlabs/ffhq-dataset) to increase the diversity of training data.
84
+
85
+ ## Requirements
86
+ **This implementation is only tested under Ubuntu environment with Nvidia GPUs and CUDA installed.** But it should also work on Windows with proper lib configures.
87
+
88
+ ## Installation
89
+ 1. Clone the repository and set up a conda environment with all dependencies as follows:
90
+ ```
91
+ git clone https://github.com/sicxu/Deep3DFaceRecon_pytorch.git
92
+ cd Deep3DFaceRecon_pytorch
93
+ conda env create -f environment.yml
94
+ source activate deep3d_pytorch
95
+ ```
96
+
97
+ 2. Install Nvdiffrast library:
98
+ ```
99
+ git clone https://github.com/NVlabs/nvdiffrast
100
+ cd nvdiffrast # ./Deep3DFaceRecon_pytorch/nvdiffrast
101
+ pip install .
102
+ ```
103
+
104
+ 3. Install Arcface Pytorch:
105
+ ```
106
+ cd .. # ./Deep3DFaceRecon_pytorch
107
+ git clone https://github.com/deepinsight/insightface.git
108
+ cp -r ./insightface/recognition/arcface_torch ./models/
109
+ ```
110
+ ## Inference with a pre-trained model
111
+
112
+ ### Prepare prerequisite models
113
+ 1. Our method uses [Basel Face Model 2009 (BFM09)](https://faces.dmi.unibas.ch/bfm/main.php?nav=1-0&id=basel_face_model) to represent 3d faces. Get access to BFM09 using this [link](https://faces.dmi.unibas.ch/bfm/main.php?nav=1-2&id=downloads). After getting the access, download "01_MorphableModel.mat". In addition, we use an Expression Basis provided by [Guo et al.](https://github.com/Juyong/3DFace). Download the Expression Basis (Exp_Pca.bin) using this [link (google drive)](https://drive.google.com/file/d/1bw5Xf8C12pWmcMhNEu6PtsYVZkVucEN6/view?usp=sharing). Organize all files into the following structure:
114
+ ```
115
+ Deep3DFaceRecon_pytorch
116
+
117
+ └─── BFM
118
+
119
+ └─── 01_MorphableModel.mat
120
+
121
+ └─── Exp_Pca.bin
122
+ |
123
+ └─── ...
124
+ ```
125
+ 2. We provide a model trained on a combination of [CelebA](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html),
126
+ [LFW](http://vis-www.cs.umass.edu/lfw/), [300WLP](http://www.cbsr.ia.ac.cn/users/xiangyuzhu/projects/3DDFA/main.htm),
127
+ [IJB-A](https://www.nist.gov/programs-projects/face-challenges), [LS3D-W](https://www.adrianbulat.com/face-alignment), and [FFHQ](https://github.com/NVlabs/ffhq-dataset) datasets. Download the pre-trained model using this [link (google drive)](https://drive.google.com/drive/folders/1liaIxn9smpudjjqMaWWRpP0mXRW_qRPP?usp=sharing) and organize the directory into the following structure:
128
+ ```
129
+ Deep3DFaceRecon_pytorch
130
+
131
+ └─── checkpoints
132
+
133
+ └─── <model_name>
134
+
135
+ └─── epoch_20.pth
136
+
137
+ ```
138
+
139
+ ### Test with custom images
140
+ To reconstruct 3d faces from test images, organize the test image folder as follows:
141
+ ```
142
+ Deep3DFaceRecon_pytorch
143
+
144
+ └─── <folder_to_test_images>
145
+
146
+ └─── *.jpg/*.png
147
+ |
148
+ └─── detections
149
+ |
150
+ └─── *.txt
151
+ ```
152
+ The \*.jpg/\*.png files are test images. The \*.txt files are detected 5 facial landmarks with a shape of 5x2, and have the same name as the corresponding images. Check [./datasets/examples](datasets/examples) for a reference.
153
+
154
+ Then, run the test script:
155
+ ```
156
+ # get reconstruction results of your custom images
157
+ python test.py --name=<model_name> --epoch=20 --img_folder=<folder_to_test_images>
158
+
159
+ # get reconstruction results of example images
160
+ python test.py --name=<model_name> --epoch=20 --img_folder=./datasets/examples
161
+ ```
162
+ **_Following [#108](https://github.com/sicxu/Deep3DFaceRecon_pytorch/issues/108), if you don't have OpenGL environment, you can simply add "--use_opengl False" to use CUDA context. Make sure you have updated the nvdiffrast to the latest version._**
163
+
164
+ Results will be saved into ./checkpoints/<model_name>/results/<folder_to_test_images>, which contain the following files:
165
+ | \*.png | A combination of cropped input image, reconstructed image, and visualization of projected landmarks.
166
+ |:----|:-----------|
167
+ | \*.obj | Reconstructed 3d face mesh with predicted color (texture+illumination) in the world coordinate space. Best viewed in Meshlab. |
168
+ | \*.mat | Predicted 257-dimensional coefficients and 68 projected 2d facial landmarks. Best viewed in Matlab.
169
+
170
+ ## Training a model from scratch
171
+ ### Prepare prerequisite models
172
+ 1. We rely on [Arcface](https://github.com/deepinsight/insightface/tree/master/recognition/arcface_torch) to extract identity features for loss computation. Download the pre-trained model from Arcface using this [link](https://github.com/deepinsight/insightface/tree/master/recognition/arcface_torch#ms1mv3). By default, we use the resnet50 backbone ([ms1mv3_arcface_r50_fp16](https://onedrive.live.com/?authkey=%21AFZjr283nwZHqbA&id=4A83B6B633B029CC%215583&cid=4A83B6B633B029CC)), organize the download files into the following structure:
173
+ ```
174
+ Deep3DFaceRecon_pytorch
175
+
176
+ └─── checkpoints
177
+
178
+ └─── recog_model
179
+
180
+ └─── ms1mv3_arcface_r50_fp16
181
+ |
182
+ └─── backbone.pth
183
+ ```
184
+ 2. We initialize R-Net using the weights trained on [ImageNet](https://image-net.org/). Download the weights provided by PyTorch using this [link](https://download.pytorch.org/models/resnet50-0676ba61.pth), and organize the file as the following structure:
185
+ ```
186
+ Deep3DFaceRecon_pytorch
187
+
188
+ └─── checkpoints
189
+
190
+ └─── init_model
191
+
192
+ └─── resnet50-0676ba61.pth
193
+ ```
194
+ 3. We provide a landmark detector (tensorflow model) to extract 68 facial landmarks for loss computation. The detector is trained on [300WLP](http://www.cbsr.ia.ac.cn/users/xiangyuzhu/projects/3DDFA/main.htm), [LFW](http://vis-www.cs.umass.edu/lfw/), and [LS3D-W](https://www.adrianbulat.com/face-alignment) datasets. Download the trained model using this [link (google drive)](https://drive.google.com/file/d/1Jl1yy2v7lIJLTRVIpgg2wvxYITI8Dkmw/view?usp=sharing) and organize the file as follows:
195
+ ```
196
+ Deep3DFaceRecon_pytorch
197
+
198
+ └─── checkpoints
199
+
200
+ └─── lm_model
201
+
202
+ └─── 68lm_detector.pb
203
+ ```
204
+ ### Data preparation
205
+ 1. To train a model with custom images,5 facial landmarks of each image are needed in advance for an image pre-alignment process. We recommend using [dlib](http://dlib.net/) or [MTCNN](https://github.com/ipazc/mtcnn) to detect these landmarks. Then, organize all files into the following structure:
206
+ ```
207
+ Deep3DFaceRecon_pytorch
208
+
209
+ └─── datasets
210
+
211
+ └─── <folder_to_training_images>
212
+
213
+ └─── *.png/*.jpg
214
+ |
215
+ └─── detections
216
+ |
217
+ └─── *.txt
218
+ ```
219
+ The \*.txt files contain 5 facial landmarks with a shape of 5x2, and should have the same name with their corresponding images.
220
+
221
+ 2. Generate 68 landmarks and skin attention mask for images using the following script:
222
+ ```
223
+ # preprocess training images
224
+ python data_preparation.py --img_folder <folder_to_training_images>
225
+
226
+ # alternatively, you can preprocess multiple image folders simultaneously
227
+ python data_preparation.py --img_folder <folder_to_training_images1> <folder_to_training_images2> <folder_to_training_images3>
228
+
229
+ # preprocess validation images
230
+ python data_preparation.py --img_folder <folder_to_validation_images> --mode=val
231
+ ```
232
+ The script will generate files of landmarks and skin masks, and save them into ./datasets/<folder_to_training_images>. In addition, it also generates a file containing the path of all training data into ./datalist which will then be used in the training script.
233
+
234
+ ### Train the face reconstruction network
235
+ Run the following script to train a face reconstruction model using the pre-processed data:
236
+ ```
237
+ # train with single GPU
238
+ python train.py --name=<custom_experiment_name> --gpu_ids=0
239
+
240
+ # train with multiple GPUs
241
+ python train.py --name=<custom_experiment_name> --gpu_ids=0,1
242
+
243
+ # train with other custom settings
244
+ python train.py --name=<custom_experiment_name> --gpu_ids=0 --batch_size=32 --n_epochs=20
245
+ ```
246
+ Training logs and model parameters will be saved into ./checkpoints/<custom_experiment_name>.
247
+
248
+ By default, the script uses a batchsize of 32 and will train the model with 20 epochs. For reference, the pre-trained model in this repo is trained with the default setting on a image collection of 300k images. A single iteration takes 0.8~0.9s on a single Tesla M40 GPU. The total training process takes around two days.
249
+
250
+ To use a trained model, see [Inference](https://github.com/sicxu/Deep3DFaceRecon_pytorch#inference-with-a-pre-trained-model) section.
251
+ ## Contact
252
+ If you have any questions, please contact the paper authors.
253
+
254
+ ## Citation
255
+
256
+ Please cite the following paper if this model helps your research:
257
+
258
+ @inproceedings{deng2019accurate,
259
+ title={Accurate 3D Face Reconstruction with Weakly-Supervised Learning: From Single Image to Image Set},
260
+ author={Yu Deng and Jiaolong Yang and Sicheng Xu and Dong Chen and Yunde Jia and Xin Tong},
261
+ booktitle={IEEE Computer Vision and Pattern Recognition Workshops},
262
+ year={2019}
263
+ }
264
+ ##
265
+ The face images on this page are from the public [CelebA](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) dataset released by MMLab, CUHK.
266
+
267
+ Part of the code in this implementation takes [CUT](https://github.com/taesungp/contrastive-unpaired-translation) as a reference.
268
+
additional_modules/deep3dfacerecon/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """
2
+ Modified from https://github.com/sicxu/Deep3DFaceRecon_pytorch
3
+ """
additional_modules/deep3dfacerecon/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (270 Bytes). View file
 
additional_modules/deep3dfacerecon/data/__init__.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """This package includes all the modules related to data loading and preprocessing
2
+
3
+ To add a custom dataset class called 'dummy', you need to add a file called 'dummy_dataset.py' and define a subclass 'DummyDataset' inherited from BaseDataset.
4
+ You need to implement four functions:
5
+ -- <__init__>: initialize the class, first call BaseDataset.__init__(self, opt).
6
+ -- <__len__>: return the size of dataset.
7
+ -- <__getitem__>: get a data point from data loader.
8
+ -- <modify_commandline_options>: (optionally) add dataset-specific options and set default options.
9
+
10
+ Now you can use the dataset class by specifying flag '--dataset_mode dummy'.
11
+ See our template dataset class 'template_dataset.py' for more details.
12
+ """
13
+ import numpy as np
14
+ import importlib
15
+ import torch.utils.data
16
+ from data.base_dataset import BaseDataset
17
+
18
+
19
+ def find_dataset_using_name(dataset_name):
20
+ """Import the module "data/[dataset_name]_dataset.py".
21
+
22
+ In the file, the class called DatasetNameDataset() will
23
+ be instantiated. It has to be a subclass of BaseDataset,
24
+ and it is case-insensitive.
25
+ """
26
+ dataset_filename = "data." + dataset_name + "_dataset"
27
+ datasetlib = importlib.import_module(dataset_filename)
28
+
29
+ dataset = None
30
+ target_dataset_name = dataset_name.replace('_', '') + 'dataset'
31
+ for name, cls in datasetlib.__dict__.items():
32
+ if name.lower() == target_dataset_name.lower() \
33
+ and issubclass(cls, BaseDataset):
34
+ dataset = cls
35
+
36
+ if dataset is None:
37
+ raise NotImplementedError("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name))
38
+
39
+ return dataset
40
+
41
+
42
+ def get_option_setter(dataset_name):
43
+ """Return the static method <modify_commandline_options> of the dataset class."""
44
+ dataset_class = find_dataset_using_name(dataset_name)
45
+ return dataset_class.modify_commandline_options
46
+
47
+
48
+ def create_dataset(opt, rank=0):
49
+ """Create a dataset given the option.
50
+
51
+ This function wraps the class CustomDatasetDataLoader.
52
+ This is the main interface between this package and 'train.py'/'test.py'
53
+
54
+ Example:
55
+ >>> from data import create_dataset
56
+ >>> dataset = create_dataset(opt)
57
+ """
58
+ data_loader = CustomDatasetDataLoader(opt, rank=rank)
59
+ dataset = data_loader.load_data()
60
+ return dataset
61
+
62
+ class CustomDatasetDataLoader():
63
+ """Wrapper class of Dataset class that performs multi-threaded data loading"""
64
+
65
+ def __init__(self, opt, rank=0):
66
+ """Initialize this class
67
+
68
+ Step 1: create a dataset instance given the name [dataset_mode]
69
+ Step 2: create a multi-threaded data loader.
70
+ """
71
+ self.opt = opt
72
+ dataset_class = find_dataset_using_name(opt.dataset_mode)
73
+ self.dataset = dataset_class(opt)
74
+ self.sampler = None
75
+ print("rank %d %s dataset [%s] was created" % (rank, self.dataset.name, type(self.dataset).__name__))
76
+ if opt.use_ddp and opt.isTrain:
77
+ world_size = opt.world_size
78
+ self.sampler = torch.utils.data.distributed.DistributedSampler(
79
+ self.dataset,
80
+ num_replicas=world_size,
81
+ rank=rank,
82
+ shuffle=not opt.serial_batches
83
+ )
84
+ self.dataloader = torch.utils.data.DataLoader(
85
+ self.dataset,
86
+ sampler=self.sampler,
87
+ num_workers=int(opt.num_threads / world_size),
88
+ batch_size=int(opt.batch_size / world_size),
89
+ drop_last=True)
90
+ else:
91
+ self.dataloader = torch.utils.data.DataLoader(
92
+ self.dataset,
93
+ batch_size=opt.batch_size,
94
+ shuffle=(not opt.serial_batches) and opt.isTrain,
95
+ num_workers=int(opt.num_threads),
96
+ drop_last=True
97
+ )
98
+
99
+ def set_epoch(self, epoch):
100
+ self.dataset.current_epoch = epoch
101
+ if self.sampler is not None:
102
+ self.sampler.set_epoch(epoch)
103
+
104
+ def load_data(self):
105
+ return self
106
+
107
+ def __len__(self):
108
+ """Return the number of data in the dataset"""
109
+ return min(len(self.dataset), self.opt.max_dataset_size)
110
+
111
+ def __iter__(self):
112
+ """Return a batch of data"""
113
+ for i, data in enumerate(self.dataloader):
114
+ if i * self.opt.batch_size >= self.opt.max_dataset_size:
115
+ break
116
+ yield data
additional_modules/deep3dfacerecon/data/base_dataset.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """This module implements an abstract base class (ABC) 'BaseDataset' for datasets.
2
+
3
+ It also includes common transformation functions (e.g., get_transform, __scale_width), which can be later used in subclasses.
4
+ """
5
+ import random
6
+ import numpy as np
7
+ import torch.utils.data as data
8
+ from PIL import Image
9
+ try:
10
+ from PIL.Image import Resampling
11
+ RESAMPLING_METHOD = Resampling.BICUBIC
12
+ except ImportError:
13
+ from PIL.Image import BICUBIC
14
+ RESAMPLING_METHOD = BICUBIC
15
+ import torchvision.transforms as transforms
16
+ from abc import ABC, abstractmethod
17
+
18
+
19
+ class BaseDataset(data.Dataset, ABC):
20
+ """This class is an abstract base class (ABC) for datasets.
21
+
22
+ To create a subclass, you need to implement the following four functions:
23
+ -- <__init__>: initialize the class, first call BaseDataset.__init__(self, opt).
24
+ -- <__len__>: return the size of dataset.
25
+ -- <__getitem__>: get a data point.
26
+ -- <modify_commandline_options>: (optionally) add dataset-specific options and set default options.
27
+ """
28
+
29
+ def __init__(self, opt):
30
+ """Initialize the class; save the options in the class
31
+
32
+ Parameters:
33
+ opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions
34
+ """
35
+ self.opt = opt
36
+ # self.root = opt.dataroot
37
+ self.current_epoch = 0
38
+
39
+ @staticmethod
40
+ def modify_commandline_options(parser, is_train):
41
+ """Add new dataset-specific options, and rewrite default values for existing options.
42
+
43
+ Parameters:
44
+ parser -- original option parser
45
+ is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options.
46
+
47
+ Returns:
48
+ the modified parser.
49
+ """
50
+ return parser
51
+
52
+ @abstractmethod
53
+ def __len__(self):
54
+ """Return the total number of images in the dataset."""
55
+ return 0
56
+
57
+ @abstractmethod
58
+ def __getitem__(self, index):
59
+ """Return a data point and its metadata information.
60
+
61
+ Parameters:
62
+ index - - a random integer for data indexing
63
+
64
+ Returns:
65
+ a dictionary of data with their names. It ususally contains the data itself and its metadata information.
66
+ """
67
+ pass
68
+
69
+
70
+ def get_transform(grayscale=False):
71
+ transform_list = []
72
+ if grayscale:
73
+ transform_list.append(transforms.Grayscale(1))
74
+ transform_list += [transforms.ToTensor()]
75
+ return transforms.Compose(transform_list)
76
+
77
+ def get_affine_mat(opt, size):
78
+ shift_x, shift_y, scale, rot_angle, flip = 0., 0., 1., 0., False
79
+ w, h = size
80
+
81
+ if 'shift' in opt.preprocess:
82
+ shift_pixs = int(opt.shift_pixs)
83
+ shift_x = random.randint(-shift_pixs, shift_pixs)
84
+ shift_y = random.randint(-shift_pixs, shift_pixs)
85
+ if 'scale' in opt.preprocess:
86
+ scale = 1 + opt.scale_delta * (2 * random.random() - 1)
87
+ if 'rot' in opt.preprocess:
88
+ rot_angle = opt.rot_angle * (2 * random.random() - 1)
89
+ rot_rad = -rot_angle * np.pi/180
90
+ if 'flip' in opt.preprocess:
91
+ flip = random.random() > 0.5
92
+
93
+ shift_to_origin = np.array([1, 0, -w//2, 0, 1, -h//2, 0, 0, 1]).reshape([3, 3])
94
+ flip_mat = np.array([-1 if flip else 1, 0, 0, 0, 1, 0, 0, 0, 1]).reshape([3, 3])
95
+ shift_mat = np.array([1, 0, shift_x, 0, 1, shift_y, 0, 0, 1]).reshape([3, 3])
96
+ rot_mat = np.array([np.cos(rot_rad), np.sin(rot_rad), 0, -np.sin(rot_rad), np.cos(rot_rad), 0, 0, 0, 1]).reshape([3, 3])
97
+ scale_mat = np.array([scale, 0, 0, 0, scale, 0, 0, 0, 1]).reshape([3, 3])
98
+ shift_to_center = np.array([1, 0, w//2, 0, 1, h//2, 0, 0, 1]).reshape([3, 3])
99
+
100
+ affine = shift_to_center @ scale_mat @ rot_mat @ shift_mat @ flip_mat @ shift_to_origin
101
+ affine_inv = np.linalg.inv(affine)
102
+ return affine, affine_inv, flip
103
+
104
+ def apply_img_affine(img, affine_inv, method=RESAMPLING_METHOD):
105
+ return img.transform(img.size, Image.AFFINE, data=affine_inv.flatten()[:6], resample=RESAMPLING_METHOD)
106
+
107
+ def apply_lm_affine(landmark, affine, flip, size):
108
+ _, h = size
109
+ lm = landmark.copy()
110
+ lm[:, 1] = h - 1 - lm[:, 1]
111
+ lm = np.concatenate((lm, np.ones([lm.shape[0], 1])), -1)
112
+ lm = lm @ np.transpose(affine)
113
+ lm[:, :2] = lm[:, :2] / lm[:, 2:]
114
+ lm = lm[:, :2]
115
+ lm[:, 1] = h - 1 - lm[:, 1]
116
+ if flip:
117
+ lm_ = lm.copy()
118
+ lm_[:17] = lm[16::-1]
119
+ lm_[17:22] = lm[26:21:-1]
120
+ lm_[22:27] = lm[21:16:-1]
121
+ lm_[31:36] = lm[35:30:-1]
122
+ lm_[36:40] = lm[45:41:-1]
123
+ lm_[40:42] = lm[47:45:-1]
124
+ lm_[42:46] = lm[39:35:-1]
125
+ lm_[46:48] = lm[41:39:-1]
126
+ lm_[48:55] = lm[54:47:-1]
127
+ lm_[55:60] = lm[59:54:-1]
128
+ lm_[60:65] = lm[64:59:-1]
129
+ lm_[65:68] = lm[67:64:-1]
130
+ lm = lm_
131
+ return lm
additional_modules/deep3dfacerecon/data/flist_dataset.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """This script defines the custom dataset for Deep3DFaceRecon_pytorch
2
+ """
3
+
4
+ import os.path
5
+ from data.base_dataset import BaseDataset, get_transform, get_affine_mat, apply_img_affine, apply_lm_affine
6
+ from data.image_folder import make_dataset
7
+ from PIL import Image
8
+ import random
9
+ import util.util as util
10
+ import numpy as np
11
+ import json
12
+ import torch
13
+ from scipy.io import loadmat, savemat
14
+ import pickle
15
+ from util.preprocess import align_img, estimate_norm
16
+ from util.load_mats import load_lm3d
17
+
18
+
19
+ def default_flist_reader(flist):
20
+ """
21
+ flist format: impath label\nimpath label\n ...(same to caffe's filelist)
22
+ """
23
+ imlist = []
24
+ with open(flist, 'r') as rf:
25
+ for line in rf.readlines():
26
+ impath = line.strip()
27
+ imlist.append(impath)
28
+
29
+ return imlist
30
+
31
+ def jason_flist_reader(flist):
32
+ with open(flist, 'r') as fp:
33
+ info = json.load(fp)
34
+ return info
35
+
36
+ def parse_label(label):
37
+ return torch.tensor(np.array(label).astype(np.float32))
38
+
39
+
40
+ class FlistDataset(BaseDataset):
41
+ """
42
+ It requires one directories to host training images '/path/to/data/train'
43
+ You can train the model with the dataset flag '--dataroot /path/to/data'.
44
+ """
45
+
46
+ def __init__(self, opt):
47
+ """Initialize this dataset class.
48
+
49
+ Parameters:
50
+ opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions
51
+ """
52
+ BaseDataset.__init__(self, opt)
53
+
54
+ self.lm3d_std = load_lm3d(opt.bfm_folder)
55
+
56
+ msk_names = default_flist_reader(opt.flist)
57
+ self.msk_paths = [os.path.join(opt.data_root, i) for i in msk_names]
58
+
59
+ self.size = len(self.msk_paths)
60
+ self.opt = opt
61
+
62
+ self.name = 'train' if opt.isTrain else 'val'
63
+ if '_' in opt.flist:
64
+ self.name += '_' + opt.flist.split(os.sep)[-1].split('_')[0]
65
+
66
+
67
+ def __getitem__(self, index):
68
+ """Return a data point and its metadata information.
69
+
70
+ Parameters:
71
+ index (int) -- a random integer for data indexing
72
+
73
+ Returns a dictionary that contains A, B, A_paths and B_paths
74
+ img (tensor) -- an image in the input domain
75
+ msk (tensor) -- its corresponding attention mask
76
+ lm (tensor) -- its corresponding 3d landmarks
77
+ im_paths (str) -- image paths
78
+ aug_flag (bool) -- a flag used to tell whether its raw or augmented
79
+ """
80
+ msk_path = self.msk_paths[index % self.size] # make sure index is within then range
81
+ img_path = msk_path.replace('mask/', '')
82
+ lm_path = '.'.join(msk_path.replace('mask', 'landmarks').split('.')[:-1]) + '.txt'
83
+
84
+ raw_img = Image.open(img_path).convert('RGB')
85
+ raw_msk = Image.open(msk_path).convert('RGB')
86
+ raw_lm = np.loadtxt(lm_path).astype(np.float32)
87
+
88
+ _, img, lm, msk = align_img(raw_img, raw_lm, self.lm3d_std, raw_msk)
89
+
90
+ aug_flag = self.opt.use_aug and self.opt.isTrain
91
+ if aug_flag:
92
+ img, lm, msk = self._augmentation(img, lm, self.opt, msk)
93
+
94
+ _, H = img.size
95
+ M = estimate_norm(lm, H)
96
+ transform = get_transform()
97
+ img_tensor = transform(img)
98
+ msk_tensor = transform(msk)[:1, ...]
99
+ lm_tensor = parse_label(lm)
100
+ M_tensor = parse_label(M)
101
+
102
+
103
+ return {'imgs': img_tensor,
104
+ 'lms': lm_tensor,
105
+ 'msks': msk_tensor,
106
+ 'M': M_tensor,
107
+ 'im_paths': img_path,
108
+ 'aug_flag': aug_flag,
109
+ 'dataset': self.name}
110
+
111
+ def _augmentation(self, img, lm, opt, msk=None):
112
+ affine, affine_inv, flip = get_affine_mat(opt, img.size)
113
+ img = apply_img_affine(img, affine_inv)
114
+ lm = apply_lm_affine(lm, affine, flip, img.size)
115
+ if msk is not None:
116
+ msk = apply_img_affine(msk, affine_inv, method=Image.BILINEAR)
117
+ return img, lm, msk
118
+
119
+
120
+
121
+
122
+ def __len__(self):
123
+ """Return the total number of images in the dataset.
124
+ """
125
+ return self.size
additional_modules/deep3dfacerecon/data/image_folder.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """A modified image folder class
2
+
3
+ We modify the official PyTorch image folder (https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py)
4
+ so that this class can load images from both current directory and its subdirectories.
5
+ """
6
+ import numpy as np
7
+ import torch.utils.data as data
8
+
9
+ from PIL import Image
10
+ import os
11
+ import os.path
12
+
13
+ IMG_EXTENSIONS = [
14
+ '.jpg', '.JPG', '.jpeg', '.JPEG',
15
+ '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP',
16
+ '.tif', '.TIF', '.tiff', '.TIFF',
17
+ ]
18
+
19
+
20
+ def is_image_file(filename):
21
+ return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)
22
+
23
+
24
+ def make_dataset(dir, max_dataset_size=float("inf")):
25
+ images = []
26
+ assert os.path.isdir(dir) or os.path.islink(dir), '%s is not a valid directory' % dir
27
+
28
+ for root, _, fnames in sorted(os.walk(dir, followlinks=True)):
29
+ for fname in fnames:
30
+ if is_image_file(fname):
31
+ path = os.path.join(root, fname)
32
+ images.append(path)
33
+ return images[:min(max_dataset_size, len(images))]
34
+
35
+
36
+ def default_loader(path):
37
+ return Image.open(path).convert('RGB')
38
+
39
+
40
+ class ImageFolder(data.Dataset):
41
+
42
+ def __init__(self, root, transform=None, return_paths=False,
43
+ loader=default_loader):
44
+ imgs = make_dataset(root)
45
+ if len(imgs) == 0:
46
+ raise(RuntimeError("Found 0 images in: " + root + "\n"
47
+ "Supported image extensions are: " + ",".join(IMG_EXTENSIONS)))
48
+
49
+ self.root = root
50
+ self.imgs = imgs
51
+ self.transform = transform
52
+ self.return_paths = return_paths
53
+ self.loader = loader
54
+
55
+ def __getitem__(self, index):
56
+ path = self.imgs[index]
57
+ img = self.loader(path)
58
+ if self.transform is not None:
59
+ img = self.transform(img)
60
+ if self.return_paths:
61
+ return img, path
62
+ else:
63
+ return img
64
+
65
+ def __len__(self):
66
+ return len(self.imgs)
additional_modules/deep3dfacerecon/data/template_dataset.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Dataset class template
2
+
3
+ This module provides a template for users to implement custom datasets.
4
+ You can specify '--dataset_mode template' to use this dataset.
5
+ The class name should be consistent with both the filename and its dataset_mode option.
6
+ The filename should be <dataset_mode>_dataset.py
7
+ The class name should be <Dataset_mode>Dataset.py
8
+ You need to implement the following functions:
9
+ -- <modify_commandline_options>: Add dataset-specific options and rewrite default values for existing options.
10
+ -- <__init__>: Initialize this dataset class.
11
+ -- <__getitem__>: Return a data point and its metadata information.
12
+ -- <__len__>: Return the number of images.
13
+ """
14
+ from data.base_dataset import BaseDataset, get_transform
15
+ # from data.image_folder import make_dataset
16
+ # from PIL import Image
17
+
18
+
19
+ class TemplateDataset(BaseDataset):
20
+ """A template dataset class for you to implement custom datasets."""
21
+ @staticmethod
22
+ def modify_commandline_options(parser, is_train):
23
+ """Add new dataset-specific options, and rewrite default values for existing options.
24
+
25
+ Parameters:
26
+ parser -- original option parser
27
+ is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options.
28
+
29
+ Returns:
30
+ the modified parser.
31
+ """
32
+ parser.add_argument('--new_dataset_option', type=float, default=1.0, help='new dataset option')
33
+ parser.set_defaults(max_dataset_size=10, new_dataset_option=2.0) # specify dataset-specific default values
34
+ return parser
35
+
36
+ def __init__(self, opt):
37
+ """Initialize this dataset class.
38
+
39
+ Parameters:
40
+ opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions
41
+
42
+ A few things can be done here.
43
+ - save the options (have been done in BaseDataset)
44
+ - get image paths and meta information of the dataset.
45
+ - define the image transformation.
46
+ """
47
+ # save the option and dataset root
48
+ BaseDataset.__init__(self, opt)
49
+ # get the image paths of your dataset;
50
+ self.image_paths = [] # You can call sorted(make_dataset(self.root, opt.max_dataset_size)) to get all the image paths under the directory self.root
51
+ # define the default transform function. You can use <base_dataset.get_transform>; You can also define your custom transform function
52
+ self.transform = get_transform(opt)
53
+
54
+ def __getitem__(self, index):
55
+ """Return a data point and its metadata information.
56
+
57
+ Parameters:
58
+ index -- a random integer for data indexing
59
+
60
+ Returns:
61
+ a dictionary of data with their names. It usually contains the data itself and its metadata information.
62
+
63
+ Step 1: get a random image path: e.g., path = self.image_paths[index]
64
+ Step 2: load your data from the disk: e.g., image = Image.open(path).convert('RGB').
65
+ Step 3: convert your data to a PyTorch tensor. You can use helpder functions such as self.transform. e.g., data = self.transform(image)
66
+ Step 4: return a data point as a dictionary.
67
+ """
68
+ path = 'temp' # needs to be a string
69
+ data_A = None # needs to be a tensor
70
+ data_B = None # needs to be a tensor
71
+ return {'data_A': data_A, 'data_B': data_B, 'path': path}
72
+
73
+ def __len__(self):
74
+ """Return the total number of images."""
75
+ return len(self.image_paths)
additional_modules/deep3dfacerecon/data_preparation.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """This script is the data preparation script for Deep3DFaceRecon_pytorch
2
+ """
3
+
4
+ import os
5
+ import numpy as np
6
+ import argparse
7
+ from util.detect_lm68 import detect_68p,load_lm_graph
8
+ from util.skin_mask import get_skin_mask
9
+ from util.generate_list import check_list, write_list
10
+ import warnings
11
+ warnings.filterwarnings("ignore")
12
+
13
+ parser = argparse.ArgumentParser()
14
+ parser.add_argument('--data_root', type=str, default='datasets', help='root directory for training data')
15
+ parser.add_argument('--img_folder', nargs="+", required=True, help='folders of training images')
16
+ parser.add_argument('--mode', type=str, default='train', help='train or val')
17
+ opt = parser.parse_args()
18
+
19
+ os.environ['CUDA_VISIBLE_DEVICES'] = '0'
20
+
21
+ def data_prepare(folder_list,mode):
22
+
23
+ lm_sess,input_op,output_op = load_lm_graph('./checkpoints/lm_model/68lm_detector.pb') # load a tensorflow version 68-landmark detector
24
+
25
+ for img_folder in folder_list:
26
+ detect_68p(img_folder,lm_sess,input_op,output_op) # detect landmarks for images
27
+ get_skin_mask(img_folder) # generate skin attention mask for images
28
+
29
+ # create files that record path to all training data
30
+ msks_list = []
31
+ for img_folder in folder_list:
32
+ path = os.path.join(img_folder, 'mask')
33
+ msks_list += ['/'.join([img_folder, 'mask', i]) for i in sorted(os.listdir(path)) if 'jpg' in i or
34
+ 'png' in i or 'jpeg' in i or 'PNG' in i]
35
+
36
+ imgs_list = [i.replace('mask/', '') for i in msks_list]
37
+ lms_list = [i.replace('mask', 'landmarks') for i in msks_list]
38
+ lms_list = ['.'.join(i.split('.')[:-1]) + '.txt' for i in lms_list]
39
+
40
+ lms_list_final, imgs_list_final, msks_list_final = check_list(lms_list, imgs_list, msks_list) # check if the path is valid
41
+ write_list(lms_list_final, imgs_list_final, msks_list_final, mode=mode) # save files
42
+
43
+ if __name__ == '__main__':
44
+ print('Datasets:',opt.img_folder)
45
+ data_prepare([os.path.join(opt.data_root,folder) for folder in opt.img_folder],opt.mode)
additional_modules/deep3dfacerecon/environment.yml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: deep3d_pytorch
2
+ channels:
3
+ - pytorch
4
+ - conda-forge
5
+ - defaults
6
+ dependencies:
7
+ - python=3.6
8
+ - pytorch=1.6.0
9
+ - torchvision=0.7.0
10
+ - numpy=1.18.1
11
+ - scikit-image=0.16.2
12
+ - scipy=1.4.1
13
+ - pillow=6.2.1
14
+ - pip=20.0.2
15
+ - ipython=7.13.0
16
+ - yaml=0.1.7
17
+ - pip:
18
+ - matplotlib==2.2.5
19
+ - opencv-python==3.4.9.33
20
+ - tensorboard==1.15.0
21
+ - tensorflow==1.15.0
22
+ - kornia==0.5.5
23
+ - dominate==2.6.0
24
+ - trimesh==3.9.20
additional_modules/deep3dfacerecon/models/__init__.py ADDED
File without changes
additional_modules/deep3dfacerecon/models/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (201 Bytes). View file
 
additional_modules/deep3dfacerecon/models/__pycache__/base_model.cpython-310.pyc ADDED
Binary file (3.25 kB). View file
 
additional_modules/deep3dfacerecon/models/__pycache__/facerecon_model.cpython-310.pyc ADDED
Binary file (2.62 kB). View file
 
additional_modules/deep3dfacerecon/models/__pycache__/networks.cpython-310.pyc ADDED
Binary file (15.7 kB). View file
 
additional_modules/deep3dfacerecon/models/arcface_torch/README.md ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Distributed Arcface Training in Pytorch
2
+
3
+ The "arcface_torch" repository is the official implementation of the ArcFace algorithm. It supports distributed and sparse training with multiple distributed training examples, including several memory-saving techniques such as mixed precision training and gradient checkpointing. It also supports training for ViT models and datasets including WebFace42M and Glint360K, two of the largest open-source datasets. Additionally, the repository comes with a built-in tool for converting to ONNX format, making it easy to submit to MFR evaluation systems.
4
+
5
+ [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/killing-two-birds-with-one-stone-efficient/face-verification-on-ijb-c)](https://paperswithcode.com/sota/face-verification-on-ijb-c?p=killing-two-birds-with-one-stone-efficient)
6
+ [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/killing-two-birds-with-one-stone-efficient/face-verification-on-ijb-b)](https://paperswithcode.com/sota/face-verification-on-ijb-b?p=killing-two-birds-with-one-stone-efficient)
7
+ [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/killing-two-birds-with-one-stone-efficient/face-verification-on-agedb-30)](https://paperswithcode.com/sota/face-verification-on-agedb-30?p=killing-two-birds-with-one-stone-efficient)
8
+ [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/killing-two-birds-with-one-stone-efficient/face-verification-on-cfp-fp)](https://paperswithcode.com/sota/face-verification-on-cfp-fp?p=killing-two-birds-with-one-stone-efficient)
9
+
10
+ ## Requirements
11
+
12
+ To avail the latest features of PyTorch, we have upgraded to version 1.12.0.
13
+
14
+ - Install [PyTorch](https://pytorch.org/get-started/previous-versions/) (torch>=1.12.0).
15
+ - (Optional) Install [DALI](https://docs.nvidia.com/deeplearning/dali/user-guide/docs/), our doc for [install_dali.md](docs/install_dali.md).
16
+ - `pip install -r requirement.txt`.
17
+
18
+ ## How to Training
19
+
20
+ To train a model, execute the `train_v2.py` script with the path to the configuration files. The sample commands provided below demonstrate the process of conducting distributed training.
21
+
22
+ ### 1. To run on one GPU:
23
+
24
+ ```shell
25
+ python train_v2.py configs/ms1mv3_r50_onegpu
26
+ ```
27
+
28
+ Note:
29
+ It is not recommended to use a single GPU for training, as this may result in longer training times and suboptimal performance. For best results, we suggest using multiple GPUs or a GPU cluster.
30
+
31
+
32
+ ### 2. To run on a machine with 8 GPUs:
33
+
34
+ ```shell
35
+ torchrun --nproc_per_node=8 train_v2.py configs/ms1mv3_r50
36
+ ```
37
+
38
+ ### 3. To run on 2 machines with 8 GPUs each:
39
+
40
+ Node 0:
41
+
42
+ ```shell
43
+ torchrun --nproc_per_node=8 --nnodes=2 --node_rank=0 --master_addr="ip1" --master_port=12581 train_v2.py configs/wf42m_pfc02_16gpus_r100
44
+ ```
45
+
46
+ Node 1:
47
+
48
+ ```shell
49
+ torchrun --nproc_per_node=8 --nnodes=2 --node_rank=1 --master_addr="ip1" --master_port=12581 train_v2.py configs/wf42m_pfc02_16gpus_r100
50
+ ```
51
+
52
+ ### 4. Run ViT-B on a machine with 24k batchsize:
53
+
54
+ ```shell
55
+ torchrun --nproc_per_node=8 train_v2.py configs/wf42m_pfc03_40epoch_8gpu_vit_b
56
+ ```
57
+
58
+
59
+ ## Download Datasets or Prepare Datasets
60
+ - [MS1MV2](https://github.com/deepinsight/insightface/tree/master/recognition/_datasets_#ms1m-arcface-85k-ids58m-images-57) (87k IDs, 5.8M images)
61
+ - [MS1MV3](https://github.com/deepinsight/insightface/tree/master/recognition/_datasets_#ms1m-retinaface) (93k IDs, 5.2M images)
62
+ - [Glint360K](https://github.com/deepinsight/insightface/tree/master/recognition/partial_fc#4-download) (360k IDs, 17.1M images)
63
+ - [WebFace42M](docs/prepare_webface42m.md) (2M IDs, 42.5M images)
64
+ - [Your Dataset, Click Here!](docs/prepare_custom_dataset.md)
65
+
66
+ Note:
67
+ If you want to use DALI for data reading, please use the script 'scripts/shuffle_rec.py' to shuffle the InsightFace style rec before using it.
68
+ Example:
69
+
70
+ `python scripts/shuffle_rec.py ms1m-retinaface-t1`
71
+
72
+ You will get the "shuffled_ms1m-retinaface-t1" folder, where the samples in the "train.rec" file are shuffled.
73
+
74
+
75
+ ## Model Zoo
76
+
77
+ - The models are available for non-commercial research purposes only.
78
+ - All models can be found in here.
79
+ - [Baidu Yun Pan](https://pan.baidu.com/s/1CL-l4zWqsI1oDuEEYVhj-g): e8pw
80
+ - [OneDrive](https://1drv.ms/u/s!AswpsDO2toNKq0lWY69vN58GR6mw?e=p9Ov5d)
81
+
82
+ ### Performance on IJB-C and [**ICCV2021-MFR**](https://github.com/deepinsight/insightface/blob/master/challenges/mfr/README.md)
83
+
84
+ ICCV2021-MFR testset consists of non-celebrities so we can ensure that it has very few overlap with public available face
85
+ recognition training set, such as MS1M and CASIA as they mostly collected from online celebrities.
86
+ As the result, we can evaluate the FAIR performance for different algorithms.
87
+
88
+ For **ICCV2021-MFR-ALL** set, TAR is measured on all-to-all 1:1 protocal, with FAR less than 0.000001(e-6). The
89
+ globalised multi-racial testset contains 242,143 identities and 1,624,305 images.
90
+
91
+
92
+ #### 1. Training on Single-Host GPU
93
+
94
+ | Datasets | Backbone | **MFR-ALL** | IJB-C(1E-4) | IJB-C(1E-5) | log |
95
+ |:---------------|:--------------------|:------------|:------------|:------------|:------------------------------------------------------------------------------------------------------------------------------------|
96
+ | MS1MV2 | mobilefacenet-0.45G | 62.07 | 93.61 | 90.28 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv2_mbf/training.log) |
97
+ | MS1MV2 | r50 | 75.13 | 95.97 | 94.07 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv2_r50/training.log) |
98
+ | MS1MV2 | r100 | 78.12 | 96.37 | 94.27 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv2_r100/training.log) |
99
+ | MS1MV3 | mobilefacenet-0.45G | 63.78 | 94.23 | 91.33 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv3_mbf/training.log) |
100
+ | MS1MV3 | r50 | 79.14 | 96.37 | 94.47 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv3_r50/training.log) |
101
+ | MS1MV3 | r100 | 81.97 | 96.85 | 95.02 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv3_r100/training.log) |
102
+ | Glint360K | mobilefacenet-0.45G | 70.18 | 95.04 | 92.62 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/glint360k_mbf/training.log) |
103
+ | Glint360K | r50 | 86.34 | 97.16 | 95.81 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/glint360k_r50/training.log) |
104
+ | Glint360k | r100 | 89.52 | 97.55 | 96.38 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/glint360k_r100/training.log) |
105
+ | WF4M | r100 | 89.87 | 97.19 | 95.48 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/wf4m_r100/training.log) |
106
+ | WF12M-PFC-0.2 | r100 | 94.75 | 97.60 | 95.90 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/wf12m_pfc02_r100/training.log) |
107
+ | WF12M-PFC-0.3 | r100 | 94.71 | 97.64 | 96.01 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/wf12m_pfc03_r100/training.log) |
108
+ | WF12M | r100 | 94.69 | 97.59 | 95.97 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/wf12m_r100/training.log) |
109
+ | WF42M-PFC-0.2 | r100 | 96.27 | 97.70 | 96.31 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/wf42m_pfc02_r100/training.log) |
110
+ | WF42M-PFC-0.2 | ViT-T-1.5G | 92.04 | 97.27 | 95.68 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/wf42m_pfc02_40epoch_8gpu_vit_t/training.log) |
111
+ | WF42M-PFC-0.3 | ViT-B-11G | 97.16 | 97.91 | 97.05 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/pfc03_wf42m_vit_b_8gpu/training.log) |
112
+
113
+ #### 2. Training on Multi-Host GPU
114
+
115
+ | Datasets | Backbone(bs*gpus) | **MFR-ALL** | IJB-C(1E-4) | IJB-C(1E-5) | Throughout | log |
116
+ |:-----------------|:------------------|:------------|:------------|:------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------|
117
+ | WF42M-PFC-0.2 | r50(512*8) | 93.83 | 97.53 | 96.16 | ~5900 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/webface42m_r50_bs4k_pfc02/training.log) |
118
+ | WF42M-PFC-0.2 | r50(512*16) | 93.96 | 97.46 | 96.12 | ~11000 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/webface42m_r50_lr01_pfc02_bs8k_16gpus/training.log) |
119
+ | WF42M-PFC-0.2 | r50(128*32) | 94.04 | 97.48 | 95.94 | ~17000 | click me |
120
+ | WF42M-PFC-0.2 | r100(128*16) | 96.28 | 97.80 | 96.57 | ~5200 | click me |
121
+ | WF42M-PFC-0.2 | r100(256*16) | 96.69 | 97.85 | 96.63 | ~5200 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/webface42m_r100_bs4k_pfc02/training.log) |
122
+ | WF42M-PFC-0.0018 | r100(512*32) | 93.08 | 97.51 | 95.88 | ~10000 | click me |
123
+ | WF42M-PFC-0.2 | r100(128*32) | 96.57 | 97.83 | 96.50 | ~9800 | click me |
124
+
125
+ `r100(128*32)` means backbone is r100, batchsize per gpu is 128, the number of gpus is 32.
126
+
127
+
128
+
129
+ #### 3. ViT For Face Recognition
130
+
131
+ | Datasets | Backbone(bs) | FLOPs | **MFR-ALL** | IJB-C(1E-4) | IJB-C(1E-5) | Throughout | log |
132
+ |:--------------|:--------------|:------|:------------|:------------|:------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------|
133
+ | WF42M-PFC-0.3 | r18(128*32) | 2.6 | 79.13 | 95.77 | 93.36 | - | click me |
134
+ | WF42M-PFC-0.3 | r50(128*32) | 6.3 | 94.03 | 97.48 | 95.94 | - | click me |
135
+ | WF42M-PFC-0.3 | r100(128*32) | 12.1 | 96.69 | 97.82 | 96.45 | - | click me |
136
+ | WF42M-PFC-0.3 | r200(128*32) | 23.5 | 97.70 | 97.97 | 96.93 | - | click me |
137
+ | WF42M-PFC-0.3 | VIT-T(384*64) | 1.5 | 92.24 | 97.31 | 95.97 | ~35000 | click me |
138
+ | WF42M-PFC-0.3 | VIT-S(384*64) | 5.7 | 95.87 | 97.73 | 96.57 | ~25000 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/pfc03_wf42m_vit_s_64gpu/training.log) |
139
+ | WF42M-PFC-0.3 | VIT-B(384*64) | 11.4 | 97.42 | 97.90 | 97.04 | ~13800 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/pfc03_wf42m_vit_b_64gpu/training.log) |
140
+ | WF42M-PFC-0.3 | VIT-L(384*64) | 25.3 | 97.85 | 98.00 | 97.23 | ~9406 | [click me](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/pfc03_wf42m_vit_l_64gpu/training.log) |
141
+
142
+ `WF42M` means WebFace42M, `PFC-0.3` means negivate class centers sample rate is 0.3.
143
+
144
+ #### 4. Noisy Datasets
145
+
146
+ | Datasets | Backbone | **MFR-ALL** | IJB-C(1E-4) | IJB-C(1E-5) | log |
147
+ |:-------------------------|:---------|:------------|:------------|:------------|:---------|
148
+ | WF12M-Flip(40%) | r50 | 43.87 | 88.35 | 80.78 | click me |
149
+ | WF12M-Flip(40%)-PFC-0.1* | r50 | 80.20 | 96.11 | 93.79 | click me |
150
+ | WF12M-Conflict | r50 | 79.93 | 95.30 | 91.56 | click me |
151
+ | WF12M-Conflict-PFC-0.3* | r50 | 91.68 | 97.28 | 95.75 | click me |
152
+
153
+ `WF12M` means WebFace12M, `+PFC-0.1*` denotes additional abnormal inter-class filtering.
154
+
155
+
156
+
157
+ ## Speed Benchmark
158
+ <div><img src="https://github.com/anxiangsir/insightface_arcface_log/blob/master/pfc_exp.png" width = "90%" /></div>
159
+
160
+
161
+ **Arcface-Torch** is an efficient tool for training large-scale face recognition training sets. When the number of classes in the training sets exceeds one million, the partial FC sampling strategy maintains the same accuracy while providing several times faster training performance and lower GPU memory utilization. The partial FC is a sparse variant of the model parallel architecture for large-scale face recognition, utilizing a sparse softmax that dynamically samples a subset of class centers for each training batch. During each iteration, only a sparse portion of the parameters are updated, leading to a significant reduction in GPU memory requirements and computational demands. With the partial FC approach, it is possible to train sets with up to 29 million identities, the largest to date. Furthermore, the partial FC method supports multi-machine distributed training and mixed precision training.
162
+
163
+
164
+
165
+ More details see
166
+ [speed_benchmark.md](docs/speed_benchmark.md) in docs.
167
+
168
+ > 1. Training Speed of Various Parallel Techniques (Samples per Second) on a Tesla V100 32GB x 8 System (Higher is Optimal)
169
+
170
+ `-` means training failed because of gpu memory limitations.
171
+
172
+ | Number of Identities in Dataset | Data Parallel | Model Parallel | Partial FC 0.1 |
173
+ |:--------------------------------|:--------------|:---------------|:---------------|
174
+ | 125000 | 4681 | 4824 | 5004 |
175
+ | 1400000 | **1672** | 3043 | 4738 |
176
+ | 5500000 | **-** | **1389** | 3975 |
177
+ | 8000000 | **-** | **-** | 3565 |
178
+ | 16000000 | **-** | **-** | 2679 |
179
+ | 29000000 | **-** | **-** | **1855** |
180
+
181
+ > 2. GPU Memory Utilization of Various Parallel Techniques (MB per GPU) on a Tesla V100 32GB x 8 System (Lower is Optimal)
182
+
183
+ | Number of Identities in Dataset | Data Parallel | Model Parallel | Partial FC 0.1 |
184
+ |:--------------------------------|:--------------|:---------------|:---------------|
185
+ | 125000 | 7358 | 5306 | 4868 |
186
+ | 1400000 | 32252 | 11178 | 6056 |
187
+ | 5500000 | **-** | 32188 | 9854 |
188
+ | 8000000 | **-** | **-** | 12310 |
189
+ | 16000000 | **-** | **-** | 19950 |
190
+ | 29000000 | **-** | **-** | 32324 |
191
+
192
+
193
+ ## Citations
194
+
195
+ ```
196
+ @inproceedings{deng2019arcface,
197
+ title={Arcface: Additive angular margin loss for deep face recognition},
198
+ author={Deng, Jiankang and Guo, Jia and Xue, Niannan and Zafeiriou, Stefanos},
199
+ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
200
+ pages={4690--4699},
201
+ year={2019}
202
+ }
203
+ @inproceedings{An_2022_CVPR,
204
+ author={An, Xiang and Deng, Jiankang and Guo, Jia and Feng, Ziyong and Zhu, XuHan and Yang, Jing and Liu, Tongliang},
205
+ title={Killing Two Birds With One Stone: Efficient and Robust Training of Face Recognition CNNs by Partial FC},
206
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
207
+ month={June},
208
+ year={2022},
209
+ pages={4042-4051}
210
+ }
211
+ @inproceedings{zhu2021webface260m,
212
+ title={Webface260m: A benchmark unveiling the power of million-scale deep face recognition},
213
+ author={Zhu, Zheng and Huang, Guan and Deng, Jiankang and Ye, Yun and Huang, Junjie and Chen, Xinze and Zhu, Jiagang and Yang, Tian and Lu, Jiwen and Du, Dalong and Zhou, Jie},
214
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
215
+ pages={10492--10502},
216
+ year={2021}
217
+ }
218
+ ```
additional_modules/deep3dfacerecon/models/arcface_torch/backbones/__init__.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .iresnet import iresnet18, iresnet34, iresnet50, iresnet100, iresnet200
2
+ from .mobilefacenet import get_mbf
3
+
4
+
5
+ def get_model(name, **kwargs):
6
+ # resnet
7
+ if name == "r18":
8
+ return iresnet18(False, **kwargs)
9
+ elif name == "r34":
10
+ return iresnet34(False, **kwargs)
11
+ elif name == "r50":
12
+ return iresnet50(False, **kwargs)
13
+ elif name == "r100":
14
+ return iresnet100(False, **kwargs)
15
+ elif name == "r200":
16
+ return iresnet200(False, **kwargs)
17
+ elif name == "r2060":
18
+ from .iresnet2060 import iresnet2060
19
+ return iresnet2060(False, **kwargs)
20
+
21
+ elif name == "mbf":
22
+ fp16 = kwargs.get("fp16", False)
23
+ num_features = kwargs.get("num_features", 512)
24
+ return get_mbf(fp16=fp16, num_features=num_features)
25
+
26
+ elif name == "mbf_large":
27
+ from .mobilefacenet import get_mbf_large
28
+ fp16 = kwargs.get("fp16", False)
29
+ num_features = kwargs.get("num_features", 512)
30
+ return get_mbf_large(fp16=fp16, num_features=num_features)
31
+
32
+ elif name == "vit_t":
33
+ num_features = kwargs.get("num_features", 512)
34
+ from .vit import VisionTransformer
35
+ return VisionTransformer(
36
+ img_size=112, patch_size=9, num_classes=num_features, embed_dim=256, depth=12,
37
+ num_heads=8, drop_path_rate=0.1, norm_layer="ln", mask_ratio=0.1)
38
+
39
+ elif name == "vit_t_dp005_mask0": # For WebFace42M
40
+ num_features = kwargs.get("num_features", 512)
41
+ from .vit import VisionTransformer
42
+ return VisionTransformer(
43
+ img_size=112, patch_size=9, num_classes=num_features, embed_dim=256, depth=12,
44
+ num_heads=8, drop_path_rate=0.05, norm_layer="ln", mask_ratio=0.0)
45
+
46
+ elif name == "vit_s":
47
+ num_features = kwargs.get("num_features", 512)
48
+ from .vit import VisionTransformer
49
+ return VisionTransformer(
50
+ img_size=112, patch_size=9, num_classes=num_features, embed_dim=512, depth=12,
51
+ num_heads=8, drop_path_rate=0.1, norm_layer="ln", mask_ratio=0.1)
52
+
53
+ elif name == "vit_s_dp005_mask_0": # For WebFace42M
54
+ num_features = kwargs.get("num_features", 512)
55
+ from .vit import VisionTransformer
56
+ return VisionTransformer(
57
+ img_size=112, patch_size=9, num_classes=num_features, embed_dim=512, depth=12,
58
+ num_heads=8, drop_path_rate=0.05, norm_layer="ln", mask_ratio=0.0)
59
+
60
+ elif name == "vit_b":
61
+ # this is a feature
62
+ num_features = kwargs.get("num_features", 512)
63
+ from .vit import VisionTransformer
64
+ return VisionTransformer(
65
+ img_size=112, patch_size=9, num_classes=num_features, embed_dim=512, depth=24,
66
+ num_heads=8, drop_path_rate=0.1, norm_layer="ln", mask_ratio=0.1, using_checkpoint=True)
67
+
68
+ elif name == "vit_b_dp005_mask_005": # For WebFace42M
69
+ # this is a feature
70
+ num_features = kwargs.get("num_features", 512)
71
+ from .vit import VisionTransformer
72
+ return VisionTransformer(
73
+ img_size=112, patch_size=9, num_classes=num_features, embed_dim=512, depth=24,
74
+ num_heads=8, drop_path_rate=0.05, norm_layer="ln", mask_ratio=0.05, using_checkpoint=True)
75
+
76
+ elif name == "vit_l_dp005_mask_005": # For WebFace42M
77
+ # this is a feature
78
+ num_features = kwargs.get("num_features", 512)
79
+ from .vit import VisionTransformer
80
+ return VisionTransformer(
81
+ img_size=112, patch_size=9, num_classes=num_features, embed_dim=768, depth=24,
82
+ num_heads=8, drop_path_rate=0.05, norm_layer="ln", mask_ratio=0.05, using_checkpoint=True)
83
+
84
+ elif name == "vit_h": # For WebFace42M
85
+ num_features = kwargs.get("num_features", 512)
86
+ from .vit import VisionTransformer
87
+ return VisionTransformer(
88
+ img_size=112, patch_size=9, num_classes=num_features, embed_dim=1024, depth=48,
89
+ num_heads=8, drop_path_rate=0.1, norm_layer="ln", mask_ratio=0, using_checkpoint=True)
90
+
91
+ else:
92
+ raise ValueError()
additional_modules/deep3dfacerecon/models/arcface_torch/backbones/iresnet.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ from torch.utils.checkpoint import checkpoint
4
+
5
+ __all__ = ['iresnet18', 'iresnet34', 'iresnet50', 'iresnet100', 'iresnet200']
6
+ using_ckpt = False
7
+
8
+ def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
9
+ """3x3 convolution with padding"""
10
+ return nn.Conv2d(in_planes,
11
+ out_planes,
12
+ kernel_size=3,
13
+ stride=stride,
14
+ padding=dilation,
15
+ groups=groups,
16
+ bias=False,
17
+ dilation=dilation)
18
+
19
+
20
+ def conv1x1(in_planes, out_planes, stride=1):
21
+ """1x1 convolution"""
22
+ return nn.Conv2d(in_planes,
23
+ out_planes,
24
+ kernel_size=1,
25
+ stride=stride,
26
+ bias=False)
27
+
28
+
29
+ class IBasicBlock(nn.Module):
30
+ expansion = 1
31
+ def __init__(self, inplanes, planes, stride=1, downsample=None,
32
+ groups=1, base_width=64, dilation=1):
33
+ super(IBasicBlock, self).__init__()
34
+ if groups != 1 or base_width != 64:
35
+ raise ValueError('BasicBlock only supports groups=1 and base_width=64')
36
+ if dilation > 1:
37
+ raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
38
+ self.bn1 = nn.BatchNorm2d(inplanes, eps=1e-05,)
39
+ self.conv1 = conv3x3(inplanes, planes)
40
+ self.bn2 = nn.BatchNorm2d(planes, eps=1e-05,)
41
+ self.prelu = nn.PReLU(planes)
42
+ self.conv2 = conv3x3(planes, planes, stride)
43
+ self.bn3 = nn.BatchNorm2d(planes, eps=1e-05,)
44
+ self.downsample = downsample
45
+ self.stride = stride
46
+
47
+ def forward_impl(self, x):
48
+ identity = x
49
+ out = self.bn1(x)
50
+ out = self.conv1(out)
51
+ out = self.bn2(out)
52
+ out = self.prelu(out)
53
+ out = self.conv2(out)
54
+ out = self.bn3(out)
55
+ if self.downsample is not None:
56
+ identity = self.downsample(x)
57
+ out += identity
58
+ return out
59
+
60
+ def forward(self, x):
61
+ if self.training and using_ckpt:
62
+ return checkpoint(self.forward_impl, x)
63
+ else:
64
+ return self.forward_impl(x)
65
+
66
+
67
+ class IResNet(nn.Module):
68
+ fc_scale = 7 * 7
69
+ def __init__(self,
70
+ block, layers, dropout=0, num_features=512, zero_init_residual=False,
71
+ groups=1, width_per_group=64, replace_stride_with_dilation=None, fp16=False):
72
+ super(IResNet, self).__init__()
73
+ self.extra_gflops = 0.0
74
+ self.fp16 = fp16
75
+ self.inplanes = 64
76
+ self.dilation = 1
77
+ if replace_stride_with_dilation is None:
78
+ replace_stride_with_dilation = [False, False, False]
79
+ if len(replace_stride_with_dilation) != 3:
80
+ raise ValueError("replace_stride_with_dilation should be None "
81
+ "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
82
+ self.groups = groups
83
+ self.base_width = width_per_group
84
+ self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
85
+ self.bn1 = nn.BatchNorm2d(self.inplanes, eps=1e-05)
86
+ self.prelu = nn.PReLU(self.inplanes)
87
+ self.layer1 = self._make_layer(block, 64, layers[0], stride=2)
88
+ self.layer2 = self._make_layer(block,
89
+ 128,
90
+ layers[1],
91
+ stride=2,
92
+ dilate=replace_stride_with_dilation[0])
93
+ self.layer3 = self._make_layer(block,
94
+ 256,
95
+ layers[2],
96
+ stride=2,
97
+ dilate=replace_stride_with_dilation[1])
98
+ self.layer4 = self._make_layer(block,
99
+ 512,
100
+ layers[3],
101
+ stride=2,
102
+ dilate=replace_stride_with_dilation[2])
103
+ self.bn2 = nn.BatchNorm2d(512 * block.expansion, eps=1e-05,)
104
+ self.dropout = nn.Dropout(p=dropout, inplace=True)
105
+ self.fc = nn.Linear(512 * block.expansion * self.fc_scale, num_features)
106
+ self.features = nn.BatchNorm1d(num_features, eps=1e-05)
107
+ nn.init.constant_(self.features.weight, 1.0)
108
+ self.features.weight.requires_grad = False
109
+
110
+ for m in self.modules():
111
+ if isinstance(m, nn.Conv2d):
112
+ nn.init.normal_(m.weight, 0, 0.1)
113
+ elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
114
+ nn.init.constant_(m.weight, 1)
115
+ nn.init.constant_(m.bias, 0)
116
+
117
+ if zero_init_residual:
118
+ for m in self.modules():
119
+ if isinstance(m, IBasicBlock):
120
+ nn.init.constant_(m.bn2.weight, 0)
121
+
122
+ def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
123
+ downsample = None
124
+ previous_dilation = self.dilation
125
+ if dilate:
126
+ self.dilation *= stride
127
+ stride = 1
128
+ if stride != 1 or self.inplanes != planes * block.expansion:
129
+ downsample = nn.Sequential(
130
+ conv1x1(self.inplanes, planes * block.expansion, stride),
131
+ nn.BatchNorm2d(planes * block.expansion, eps=1e-05, ),
132
+ )
133
+ layers = []
134
+ layers.append(
135
+ block(self.inplanes, planes, stride, downsample, self.groups,
136
+ self.base_width, previous_dilation))
137
+ self.inplanes = planes * block.expansion
138
+ for _ in range(1, blocks):
139
+ layers.append(
140
+ block(self.inplanes,
141
+ planes,
142
+ groups=self.groups,
143
+ base_width=self.base_width,
144
+ dilation=self.dilation))
145
+
146
+ return nn.Sequential(*layers)
147
+
148
+ def forward(self, x):
149
+ with torch.cuda.amp.autocast(self.fp16):
150
+ x = self.conv1(x)
151
+ x = self.bn1(x)
152
+ x = self.prelu(x)
153
+ x = self.layer1(x)
154
+ x = self.layer2(x)
155
+ x = self.layer3(x)
156
+ x = self.layer4(x)
157
+ x = self.bn2(x)
158
+ x = torch.flatten(x, 1)
159
+ x = self.dropout(x)
160
+ x = self.fc(x.float() if self.fp16 else x)
161
+ x = self.features(x)
162
+ return x
163
+
164
+
165
+ def _iresnet(arch, block, layers, pretrained, progress, **kwargs):
166
+ model = IResNet(block, layers, **kwargs)
167
+ if pretrained:
168
+ raise ValueError()
169
+ return model
170
+
171
+
172
+ def iresnet18(pretrained=False, progress=True, **kwargs):
173
+ return _iresnet('iresnet18', IBasicBlock, [2, 2, 2, 2], pretrained,
174
+ progress, **kwargs)
175
+
176
+
177
+ def iresnet34(pretrained=False, progress=True, **kwargs):
178
+ return _iresnet('iresnet34', IBasicBlock, [3, 4, 6, 3], pretrained,
179
+ progress, **kwargs)
180
+
181
+
182
+ def iresnet50(pretrained=False, progress=True, **kwargs):
183
+ return _iresnet('iresnet50', IBasicBlock, [3, 4, 14, 3], pretrained,
184
+ progress, **kwargs)
185
+
186
+
187
+ def iresnet100(pretrained=False, progress=True, **kwargs):
188
+ return _iresnet('iresnet100', IBasicBlock, [3, 13, 30, 3], pretrained,
189
+ progress, **kwargs)
190
+
191
+
192
+ def iresnet200(pretrained=False, progress=True, **kwargs):
193
+ return _iresnet('iresnet200', IBasicBlock, [6, 26, 60, 6], pretrained,
194
+ progress, **kwargs)
additional_modules/deep3dfacerecon/models/arcface_torch/backbones/iresnet2060.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+
4
+ assert torch.__version__ >= "1.8.1"
5
+ from torch.utils.checkpoint import checkpoint_sequential
6
+
7
+ __all__ = ['iresnet2060']
8
+
9
+
10
+ def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
11
+ """3x3 convolution with padding"""
12
+ return nn.Conv2d(in_planes,
13
+ out_planes,
14
+ kernel_size=3,
15
+ stride=stride,
16
+ padding=dilation,
17
+ groups=groups,
18
+ bias=False,
19
+ dilation=dilation)
20
+
21
+
22
+ def conv1x1(in_planes, out_planes, stride=1):
23
+ """1x1 convolution"""
24
+ return nn.Conv2d(in_planes,
25
+ out_planes,
26
+ kernel_size=1,
27
+ stride=stride,
28
+ bias=False)
29
+
30
+
31
+ class IBasicBlock(nn.Module):
32
+ expansion = 1
33
+
34
+ def __init__(self, inplanes, planes, stride=1, downsample=None,
35
+ groups=1, base_width=64, dilation=1):
36
+ super(IBasicBlock, self).__init__()
37
+ if groups != 1 or base_width != 64:
38
+ raise ValueError('BasicBlock only supports groups=1 and base_width=64')
39
+ if dilation > 1:
40
+ raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
41
+ self.bn1 = nn.BatchNorm2d(inplanes, eps=1e-05, )
42
+ self.conv1 = conv3x3(inplanes, planes)
43
+ self.bn2 = nn.BatchNorm2d(planes, eps=1e-05, )
44
+ self.prelu = nn.PReLU(planes)
45
+ self.conv2 = conv3x3(planes, planes, stride)
46
+ self.bn3 = nn.BatchNorm2d(planes, eps=1e-05, )
47
+ self.downsample = downsample
48
+ self.stride = stride
49
+
50
+ def forward(self, x):
51
+ identity = x
52
+ out = self.bn1(x)
53
+ out = self.conv1(out)
54
+ out = self.bn2(out)
55
+ out = self.prelu(out)
56
+ out = self.conv2(out)
57
+ out = self.bn3(out)
58
+ if self.downsample is not None:
59
+ identity = self.downsample(x)
60
+ out += identity
61
+ return out
62
+
63
+
64
+ class IResNet(nn.Module):
65
+ fc_scale = 7 * 7
66
+
67
+ def __init__(self,
68
+ block, layers, dropout=0, num_features=512, zero_init_residual=False,
69
+ groups=1, width_per_group=64, replace_stride_with_dilation=None, fp16=False):
70
+ super(IResNet, self).__init__()
71
+ self.fp16 = fp16
72
+ self.inplanes = 64
73
+ self.dilation = 1
74
+ if replace_stride_with_dilation is None:
75
+ replace_stride_with_dilation = [False, False, False]
76
+ if len(replace_stride_with_dilation) != 3:
77
+ raise ValueError("replace_stride_with_dilation should be None "
78
+ "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
79
+ self.groups = groups
80
+ self.base_width = width_per_group
81
+ self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
82
+ self.bn1 = nn.BatchNorm2d(self.inplanes, eps=1e-05)
83
+ self.prelu = nn.PReLU(self.inplanes)
84
+ self.layer1 = self._make_layer(block, 64, layers[0], stride=2)
85
+ self.layer2 = self._make_layer(block,
86
+ 128,
87
+ layers[1],
88
+ stride=2,
89
+ dilate=replace_stride_with_dilation[0])
90
+ self.layer3 = self._make_layer(block,
91
+ 256,
92
+ layers[2],
93
+ stride=2,
94
+ dilate=replace_stride_with_dilation[1])
95
+ self.layer4 = self._make_layer(block,
96
+ 512,
97
+ layers[3],
98
+ stride=2,
99
+ dilate=replace_stride_with_dilation[2])
100
+ self.bn2 = nn.BatchNorm2d(512 * block.expansion, eps=1e-05, )
101
+ self.dropout = nn.Dropout(p=dropout, inplace=True)
102
+ self.fc = nn.Linear(512 * block.expansion * self.fc_scale, num_features)
103
+ self.features = nn.BatchNorm1d(num_features, eps=1e-05)
104
+ nn.init.constant_(self.features.weight, 1.0)
105
+ self.features.weight.requires_grad = False
106
+
107
+ for m in self.modules():
108
+ if isinstance(m, nn.Conv2d):
109
+ nn.init.normal_(m.weight, 0, 0.1)
110
+ elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
111
+ nn.init.constant_(m.weight, 1)
112
+ nn.init.constant_(m.bias, 0)
113
+
114
+ if zero_init_residual:
115
+ for m in self.modules():
116
+ if isinstance(m, IBasicBlock):
117
+ nn.init.constant_(m.bn2.weight, 0)
118
+
119
+ def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
120
+ downsample = None
121
+ previous_dilation = self.dilation
122
+ if dilate:
123
+ self.dilation *= stride
124
+ stride = 1
125
+ if stride != 1 or self.inplanes != planes * block.expansion:
126
+ downsample = nn.Sequential(
127
+ conv1x1(self.inplanes, planes * block.expansion, stride),
128
+ nn.BatchNorm2d(planes * block.expansion, eps=1e-05, ),
129
+ )
130
+ layers = []
131
+ layers.append(
132
+ block(self.inplanes, planes, stride, downsample, self.groups,
133
+ self.base_width, previous_dilation))
134
+ self.inplanes = planes * block.expansion
135
+ for _ in range(1, blocks):
136
+ layers.append(
137
+ block(self.inplanes,
138
+ planes,
139
+ groups=self.groups,
140
+ base_width=self.base_width,
141
+ dilation=self.dilation))
142
+
143
+ return nn.Sequential(*layers)
144
+
145
+ def checkpoint(self, func, num_seg, x):
146
+ if self.training:
147
+ return checkpoint_sequential(func, num_seg, x)
148
+ else:
149
+ return func(x)
150
+
151
+ def forward(self, x):
152
+ with torch.cuda.amp.autocast(self.fp16):
153
+ x = self.conv1(x)
154
+ x = self.bn1(x)
155
+ x = self.prelu(x)
156
+ x = self.layer1(x)
157
+ x = self.checkpoint(self.layer2, 20, x)
158
+ x = self.checkpoint(self.layer3, 100, x)
159
+ x = self.layer4(x)
160
+ x = self.bn2(x)
161
+ x = torch.flatten(x, 1)
162
+ x = self.dropout(x)
163
+ x = self.fc(x.float() if self.fp16 else x)
164
+ x = self.features(x)
165
+ return x
166
+
167
+
168
+ def _iresnet(arch, block, layers, pretrained, progress, **kwargs):
169
+ model = IResNet(block, layers, **kwargs)
170
+ if pretrained:
171
+ raise ValueError()
172
+ return model
173
+
174
+
175
+ def iresnet2060(pretrained=False, progress=True, **kwargs):
176
+ return _iresnet('iresnet2060', IBasicBlock, [3, 128, 1024 - 128, 3], pretrained, progress, **kwargs)
additional_modules/deep3dfacerecon/models/arcface_torch/backbones/mobilefacenet.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Adapted from https://github.com/cavalleria/cavaface.pytorch/blob/master/backbone/mobilefacenet.py
3
+ Original author cavalleria
4
+ '''
5
+
6
+ import torch.nn as nn
7
+ from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, Sequential, Module
8
+ import torch
9
+
10
+
11
+ class Flatten(Module):
12
+ def forward(self, x):
13
+ return x.view(x.size(0), -1)
14
+
15
+
16
+ class ConvBlock(Module):
17
+ def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
18
+ super(ConvBlock, self).__init__()
19
+ self.layers = nn.Sequential(
20
+ Conv2d(in_c, out_c, kernel, groups=groups, stride=stride, padding=padding, bias=False),
21
+ BatchNorm2d(num_features=out_c),
22
+ PReLU(num_parameters=out_c)
23
+ )
24
+
25
+ def forward(self, x):
26
+ return self.layers(x)
27
+
28
+
29
+ class LinearBlock(Module):
30
+ def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
31
+ super(LinearBlock, self).__init__()
32
+ self.layers = nn.Sequential(
33
+ Conv2d(in_c, out_c, kernel, stride, padding, groups=groups, bias=False),
34
+ BatchNorm2d(num_features=out_c)
35
+ )
36
+
37
+ def forward(self, x):
38
+ return self.layers(x)
39
+
40
+
41
+ class DepthWise(Module):
42
+ def __init__(self, in_c, out_c, residual=False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1):
43
+ super(DepthWise, self).__init__()
44
+ self.residual = residual
45
+ self.layers = nn.Sequential(
46
+ ConvBlock(in_c, out_c=groups, kernel=(1, 1), padding=(0, 0), stride=(1, 1)),
47
+ ConvBlock(groups, groups, groups=groups, kernel=kernel, padding=padding, stride=stride),
48
+ LinearBlock(groups, out_c, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
49
+ )
50
+
51
+ def forward(self, x):
52
+ short_cut = None
53
+ if self.residual:
54
+ short_cut = x
55
+ x = self.layers(x)
56
+ if self.residual:
57
+ output = short_cut + x
58
+ else:
59
+ output = x
60
+ return output
61
+
62
+
63
+ class Residual(Module):
64
+ def __init__(self, c, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)):
65
+ super(Residual, self).__init__()
66
+ modules = []
67
+ for _ in range(num_block):
68
+ modules.append(DepthWise(c, c, True, kernel, stride, padding, groups))
69
+ self.layers = Sequential(*modules)
70
+
71
+ def forward(self, x):
72
+ return self.layers(x)
73
+
74
+
75
+ class GDC(Module):
76
+ def __init__(self, embedding_size):
77
+ super(GDC, self).__init__()
78
+ self.layers = nn.Sequential(
79
+ LinearBlock(512, 512, groups=512, kernel=(7, 7), stride=(1, 1), padding=(0, 0)),
80
+ Flatten(),
81
+ Linear(512, embedding_size, bias=False),
82
+ BatchNorm1d(embedding_size))
83
+
84
+ def forward(self, x):
85
+ return self.layers(x)
86
+
87
+
88
+ class MobileFaceNet(Module):
89
+ def __init__(self, fp16=False, num_features=512, blocks=(1, 4, 6, 2), scale=2):
90
+ super(MobileFaceNet, self).__init__()
91
+ self.scale = scale
92
+ self.fp16 = fp16
93
+ self.layers = nn.ModuleList()
94
+ self.layers.append(
95
+ ConvBlock(3, 64 * self.scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1))
96
+ )
97
+ if blocks[0] == 1:
98
+ self.layers.append(
99
+ ConvBlock(64 * self.scale, 64 * self.scale, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64)
100
+ )
101
+ else:
102
+ self.layers.append(
103
+ Residual(64 * self.scale, num_block=blocks[0], groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1)),
104
+ )
105
+
106
+ self.layers.extend(
107
+ [
108
+ DepthWise(64 * self.scale, 64 * self.scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128),
109
+ Residual(64 * self.scale, num_block=blocks[1], groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1)),
110
+ DepthWise(64 * self.scale, 128 * self.scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256),
111
+ Residual(128 * self.scale, num_block=blocks[2], groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)),
112
+ DepthWise(128 * self.scale, 128 * self.scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=512),
113
+ Residual(128 * self.scale, num_block=blocks[3], groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)),
114
+ ])
115
+
116
+ self.conv_sep = ConvBlock(128 * self.scale, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0))
117
+ self.features = GDC(num_features)
118
+ self._initialize_weights()
119
+
120
+ def _initialize_weights(self):
121
+ for m in self.modules():
122
+ if isinstance(m, nn.Conv2d):
123
+ nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
124
+ if m.bias is not None:
125
+ m.bias.data.zero_()
126
+ elif isinstance(m, nn.BatchNorm2d):
127
+ m.weight.data.fill_(1)
128
+ m.bias.data.zero_()
129
+ elif isinstance(m, nn.Linear):
130
+ nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
131
+ if m.bias is not None:
132
+ m.bias.data.zero_()
133
+
134
+ def forward(self, x):
135
+ with torch.cuda.amp.autocast(self.fp16):
136
+ for func in self.layers:
137
+ x = func(x)
138
+ x = self.conv_sep(x.float() if self.fp16 else x)
139
+ x = self.features(x)
140
+ return x
141
+
142
+
143
+ def get_mbf(fp16, num_features, blocks=(1, 4, 6, 2), scale=2):
144
+ return MobileFaceNet(fp16, num_features, blocks, scale=scale)
145
+
146
+ def get_mbf_large(fp16, num_features, blocks=(2, 8, 12, 4), scale=4):
147
+ return MobileFaceNet(fp16, num_features, blocks, scale=scale)
additional_modules/deep3dfacerecon/models/arcface_torch/backbones/vit.py ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from timm.models.layers import DropPath, to_2tuple, trunc_normal_
4
+ from typing import Optional, Callable
5
+
6
+ class Mlp(nn.Module):
7
+ def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.ReLU6, drop=0.):
8
+ super().__init__()
9
+ out_features = out_features or in_features
10
+ hidden_features = hidden_features or in_features
11
+ self.fc1 = nn.Linear(in_features, hidden_features)
12
+ self.act = act_layer()
13
+ self.fc2 = nn.Linear(hidden_features, out_features)
14
+ self.drop = nn.Dropout(drop)
15
+
16
+ def forward(self, x):
17
+ x = self.fc1(x)
18
+ x = self.act(x)
19
+ x = self.drop(x)
20
+ x = self.fc2(x)
21
+ x = self.drop(x)
22
+ return x
23
+
24
+
25
+ class VITBatchNorm(nn.Module):
26
+ def __init__(self, num_features):
27
+ super().__init__()
28
+ self.num_features = num_features
29
+ self.bn = nn.BatchNorm1d(num_features=num_features)
30
+
31
+ def forward(self, x):
32
+ return self.bn(x)
33
+
34
+
35
+ class Attention(nn.Module):
36
+ def __init__(self,
37
+ dim: int,
38
+ num_heads: int = 8,
39
+ qkv_bias: bool = False,
40
+ qk_scale: Optional[None] = None,
41
+ attn_drop: float = 0.,
42
+ proj_drop: float = 0.):
43
+ super().__init__()
44
+ self.num_heads = num_heads
45
+ head_dim = dim // num_heads
46
+ # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
47
+ self.scale = qk_scale or head_dim ** -0.5
48
+
49
+ self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
50
+ self.attn_drop = nn.Dropout(attn_drop)
51
+ self.proj = nn.Linear(dim, dim)
52
+ self.proj_drop = nn.Dropout(proj_drop)
53
+
54
+ def forward(self, x):
55
+
56
+ with torch.cuda.amp.autocast(True):
57
+ batch_size, num_token, embed_dim = x.shape
58
+ #qkv is [3,batch_size,num_heads,num_token, embed_dim//num_heads]
59
+ qkv = self.qkv(x).reshape(
60
+ batch_size, num_token, 3, self.num_heads, embed_dim // self.num_heads).permute(2, 0, 3, 1, 4)
61
+ with torch.cuda.amp.autocast(False):
62
+ q, k, v = qkv[0].float(), qkv[1].float(), qkv[2].float()
63
+ attn = (q @ k.transpose(-2, -1)) * self.scale
64
+ attn = attn.softmax(dim=-1)
65
+ attn = self.attn_drop(attn)
66
+ x = (attn @ v).transpose(1, 2).reshape(batch_size, num_token, embed_dim)
67
+ with torch.cuda.amp.autocast(True):
68
+ x = self.proj(x)
69
+ x = self.proj_drop(x)
70
+ return x
71
+
72
+
73
+ class Block(nn.Module):
74
+
75
+ def __init__(self,
76
+ dim: int,
77
+ num_heads: int,
78
+ num_patches: int,
79
+ mlp_ratio: float = 4.,
80
+ qkv_bias: bool = False,
81
+ qk_scale: Optional[None] = None,
82
+ drop: float = 0.,
83
+ attn_drop: float = 0.,
84
+ drop_path: float = 0.,
85
+ act_layer: Callable = nn.ReLU6,
86
+ norm_layer: str = "ln",
87
+ patch_n: int = 144):
88
+ super().__init__()
89
+
90
+ if norm_layer == "bn":
91
+ self.norm1 = VITBatchNorm(num_features=num_patches)
92
+ self.norm2 = VITBatchNorm(num_features=num_patches)
93
+ elif norm_layer == "ln":
94
+ self.norm1 = nn.LayerNorm(dim)
95
+ self.norm2 = nn.LayerNorm(dim)
96
+
97
+ self.attn = Attention(
98
+ dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
99
+ # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
100
+ self.drop_path = DropPath(
101
+ drop_path) if drop_path > 0. else nn.Identity()
102
+ mlp_hidden_dim = int(dim * mlp_ratio)
103
+ self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim,
104
+ act_layer=act_layer, drop=drop)
105
+ self.extra_gflops = (num_heads * patch_n * (dim//num_heads)*patch_n * 2) / (1000**3)
106
+
107
+ def forward(self, x):
108
+ x = x + self.drop_path(self.attn(self.norm1(x)))
109
+ with torch.cuda.amp.autocast(True):
110
+ x = x + self.drop_path(self.mlp(self.norm2(x)))
111
+ return x
112
+
113
+
114
+ class PatchEmbed(nn.Module):
115
+ def __init__(self, img_size=108, patch_size=9, in_channels=3, embed_dim=768):
116
+ super().__init__()
117
+ img_size = to_2tuple(img_size)
118
+ patch_size = to_2tuple(patch_size)
119
+ num_patches = (img_size[1] // patch_size[1]) * \
120
+ (img_size[0] // patch_size[0])
121
+ self.img_size = img_size
122
+ self.patch_size = patch_size
123
+ self.num_patches = num_patches
124
+ self.proj = nn.Conv2d(in_channels, embed_dim,
125
+ kernel_size=patch_size, stride=patch_size)
126
+
127
+ def forward(self, x):
128
+ batch_size, channels, height, width = x.shape
129
+ assert height == self.img_size[0] and width == self.img_size[1], \
130
+ f"Input image size ({height}*{width}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
131
+ x = self.proj(x).flatten(2).transpose(1, 2)
132
+ return x
133
+
134
+
135
+ class VisionTransformer(nn.Module):
136
+ """ Vision Transformer with support for patch or hybrid CNN input stage
137
+ """
138
+
139
+ def __init__(self,
140
+ img_size: int = 112,
141
+ patch_size: int = 16,
142
+ in_channels: int = 3,
143
+ num_classes: int = 1000,
144
+ embed_dim: int = 768,
145
+ depth: int = 12,
146
+ num_heads: int = 12,
147
+ mlp_ratio: float = 4.,
148
+ qkv_bias: bool = False,
149
+ qk_scale: Optional[None] = None,
150
+ drop_rate: float = 0.,
151
+ attn_drop_rate: float = 0.,
152
+ drop_path_rate: float = 0.,
153
+ hybrid_backbone: Optional[None] = None,
154
+ norm_layer: str = "ln",
155
+ mask_ratio = 0.1,
156
+ using_checkpoint = False,
157
+ ):
158
+ super().__init__()
159
+ self.num_classes = num_classes
160
+ # num_features for consistency with other models
161
+ self.num_features = self.embed_dim = embed_dim
162
+
163
+ if hybrid_backbone is not None:
164
+ raise ValueError
165
+ else:
166
+ self.patch_embed = PatchEmbed(img_size=img_size, patch_size=patch_size, in_channels=in_channels, embed_dim=embed_dim)
167
+ self.mask_ratio = mask_ratio
168
+ self.using_checkpoint = using_checkpoint
169
+ num_patches = self.patch_embed.num_patches
170
+ self.num_patches = num_patches
171
+
172
+ self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
173
+ self.pos_drop = nn.Dropout(p=drop_rate)
174
+
175
+ # stochastic depth decay rule
176
+ dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]
177
+ patch_n = (img_size//patch_size)**2
178
+ self.blocks = nn.ModuleList(
179
+ [
180
+ Block(dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
181
+ drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer,
182
+ num_patches=num_patches, patch_n=patch_n)
183
+ for i in range(depth)]
184
+ )
185
+ self.extra_gflops = 0.0
186
+ for _block in self.blocks:
187
+ self.extra_gflops += _block.extra_gflops
188
+
189
+ if norm_layer == "ln":
190
+ self.norm = nn.LayerNorm(embed_dim)
191
+ elif norm_layer == "bn":
192
+ self.norm = VITBatchNorm(self.num_patches)
193
+
194
+ # features head
195
+ self.feature = nn.Sequential(
196
+ nn.Linear(in_features=embed_dim * num_patches, out_features=embed_dim, bias=False),
197
+ nn.BatchNorm1d(num_features=embed_dim, eps=2e-5),
198
+ nn.Linear(in_features=embed_dim, out_features=num_classes, bias=False),
199
+ nn.BatchNorm1d(num_features=num_classes, eps=2e-5)
200
+ )
201
+
202
+ self.mask_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
203
+ torch.nn.init.normal_(self.mask_token, std=.02)
204
+ trunc_normal_(self.pos_embed, std=.02)
205
+ # trunc_normal_(self.cls_token, std=.02)
206
+ self.apply(self._init_weights)
207
+
208
+ def _init_weights(self, m):
209
+ if isinstance(m, nn.Linear):
210
+ trunc_normal_(m.weight, std=.02)
211
+ if isinstance(m, nn.Linear) and m.bias is not None:
212
+ nn.init.constant_(m.bias, 0)
213
+ elif isinstance(m, nn.LayerNorm):
214
+ nn.init.constant_(m.bias, 0)
215
+ nn.init.constant_(m.weight, 1.0)
216
+
217
+ @torch.jit.ignore
218
+ def no_weight_decay(self):
219
+ return {'pos_embed', 'cls_token'}
220
+
221
+ def get_classifier(self):
222
+ return self.head
223
+
224
+ def random_masking(self, x, mask_ratio=0.1):
225
+ """
226
+ Perform per-sample random masking by per-sample shuffling.
227
+ Per-sample shuffling is done by argsort random noise.
228
+ x: [N, L, D], sequence
229
+ """
230
+ N, L, D = x.size() # batch, length, dim
231
+ len_keep = int(L * (1 - mask_ratio))
232
+
233
+ noise = torch.rand(N, L, device=x.device) # noise in [0, 1]
234
+
235
+ # sort noise for each sample
236
+ # ascend: small is keep, large is remove
237
+ ids_shuffle = torch.argsort(noise, dim=1)
238
+ ids_restore = torch.argsort(ids_shuffle, dim=1)
239
+
240
+ # keep the first subset
241
+ ids_keep = ids_shuffle[:, :len_keep]
242
+ x_masked = torch.gather(
243
+ x, dim=1, index=ids_keep.unsqueeze(-1).repeat(1, 1, D))
244
+
245
+ # generate the binary mask: 0 is keep, 1 is remove
246
+ mask = torch.ones([N, L], device=x.device)
247
+ mask[:, :len_keep] = 0
248
+ # unshuffle to get the binary mask
249
+ mask = torch.gather(mask, dim=1, index=ids_restore)
250
+
251
+ return x_masked, mask, ids_restore
252
+
253
+ def forward_features(self, x):
254
+ B = x.shape[0]
255
+ x = self.patch_embed(x)
256
+ x = x + self.pos_embed
257
+ x = self.pos_drop(x)
258
+
259
+ if self.training and self.mask_ratio > 0:
260
+ x, _, ids_restore = self.random_masking(x)
261
+
262
+ for func in self.blocks:
263
+ if self.using_checkpoint and self.training:
264
+ from torch.utils.checkpoint import checkpoint
265
+ x = checkpoint(func, x)
266
+ else:
267
+ x = func(x)
268
+ x = self.norm(x.float())
269
+
270
+ if self.training and self.mask_ratio > 0:
271
+ mask_tokens = self.mask_token.repeat(x.shape[0], ids_restore.shape[1] - x.shape[1], 1)
272
+ x_ = torch.cat([x[:, :, :], mask_tokens], dim=1) # no cls token
273
+ x_ = torch.gather(x_, dim=1, index=ids_restore.unsqueeze(-1).repeat(1, 1, x.shape[2])) # unshuffle
274
+ x = x_
275
+ return torch.reshape(x, (B, self.num_patches * self.embed_dim))
276
+
277
+ def forward(self, x):
278
+ x = self.forward_features(x)
279
+ x = self.feature(x)
280
+ return x
additional_modules/deep3dfacerecon/models/arcface_torch/configs/3millions.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # configs for test speed
4
+
5
+ config = edict()
6
+ config.margin_list = (1.0, 0.0, 0.4)
7
+ config.network = "mbf"
8
+ config.resume = False
9
+ config.output = None
10
+ config.embedding_size = 512
11
+ config.sample_rate = 0.1
12
+ config.fp16 = True
13
+ config.momentum = 0.9
14
+ config.weight_decay = 5e-4
15
+ config.batch_size = 512 # total_batch_size = batch_size * num_gpus
16
+ config.lr = 0.1 # batch size is 512
17
+
18
+ config.rec = "synthetic"
19
+ config.num_classes = 30 * 10000
20
+ config.num_image = 100000
21
+ config.num_epoch = 30
22
+ config.warmup_epoch = -1
23
+ config.val_targets = []
additional_modules/deep3dfacerecon/models/arcface_torch/configs/__init__.py ADDED
File without changes
additional_modules/deep3dfacerecon/models/arcface_torch/configs/base.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # make training faster
4
+ # our RAM is 256G
5
+ # mount -t tmpfs -o size=140G tmpfs /train_tmp
6
+
7
+ config = edict()
8
+
9
+ # Margin Base Softmax
10
+ config.margin_list = (1.0, 0.5, 0.0)
11
+ config.network = "r50"
12
+ config.resume = False
13
+ config.save_all_states = False
14
+ config.output = "ms1mv3_arcface_r50"
15
+
16
+ config.embedding_size = 512
17
+
18
+ # Partial FC
19
+ config.sample_rate = 1
20
+ config.interclass_filtering_threshold = 0
21
+
22
+ config.fp16 = False
23
+ config.batch_size = 128
24
+
25
+ # For SGD
26
+ config.optimizer = "sgd"
27
+ config.lr = 0.1
28
+ config.momentum = 0.9
29
+ config.weight_decay = 5e-4
30
+
31
+ # For AdamW
32
+ # config.optimizer = "adamw"
33
+ # config.lr = 0.001
34
+ # config.weight_decay = 0.1
35
+
36
+ config.verbose = 2000
37
+ config.frequent = 10
38
+
39
+ # For Large Sacle Dataset, such as WebFace42M
40
+ config.dali = False
41
+ config.dali_aug = False
42
+
43
+ # Gradient ACC
44
+ config.gradient_acc = 1
45
+
46
+ # setup seed
47
+ config.seed = 2048
48
+
49
+ # dataload numworkers
50
+ config.num_workers = 2
51
+
52
+ # WandB Logger
53
+ config.wandb_key = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
54
+ config.suffix_run_name = None
55
+ config.using_wandb = False
56
+ config.wandb_entity = "entity"
57
+ config.wandb_project = "project"
58
+ config.wandb_log_all = True
59
+ config.save_artifacts = False
60
+ config.wandb_resume = False # resume wandb run: Only if the you wand t resume the last run that it was interrupted
additional_modules/deep3dfacerecon/models/arcface_torch/configs/glint360k_mbf.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # make training faster
4
+ # our RAM is 256G
5
+ # mount -t tmpfs -o size=140G tmpfs /train_tmp
6
+
7
+ config = edict()
8
+ config.margin_list = (1.0, 0.0, 0.4)
9
+ config.network = "mbf"
10
+ config.resume = False
11
+ config.output = None
12
+ config.embedding_size = 512
13
+ config.sample_rate = 1.0
14
+ config.fp16 = True
15
+ config.momentum = 0.9
16
+ config.weight_decay = 1e-4
17
+ config.batch_size = 128
18
+ config.lr = 0.1
19
+ config.verbose = 2000
20
+ config.dali = False
21
+
22
+ config.rec = "/train_tmp/glint360k"
23
+ config.num_classes = 360232
24
+ config.num_image = 17091657
25
+ config.num_epoch = 20
26
+ config.warmup_epoch = 0
27
+ config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
additional_modules/deep3dfacerecon/models/arcface_torch/configs/glint360k_r100.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # make training faster
4
+ # our RAM is 256G
5
+ # mount -t tmpfs -o size=140G tmpfs /train_tmp
6
+
7
+ config = edict()
8
+ config.margin_list = (1.0, 0.0, 0.4)
9
+ config.network = "r100"
10
+ config.resume = False
11
+ config.output = None
12
+ config.embedding_size = 512
13
+ config.sample_rate = 1.0
14
+ config.fp16 = True
15
+ config.momentum = 0.9
16
+ config.weight_decay = 1e-4
17
+ config.batch_size = 128
18
+ config.lr = 0.1
19
+ config.verbose = 2000
20
+ config.dali = False
21
+
22
+ config.rec = "/train_tmp/glint360k"
23
+ config.num_classes = 360232
24
+ config.num_image = 17091657
25
+ config.num_epoch = 20
26
+ config.warmup_epoch = 0
27
+ config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
additional_modules/deep3dfacerecon/models/arcface_torch/configs/glint360k_r50.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # make training faster
4
+ # our RAM is 256G
5
+ # mount -t tmpfs -o size=140G tmpfs /train_tmp
6
+
7
+ config = edict()
8
+ config.margin_list = (1.0, 0.0, 0.4)
9
+ config.network = "r50"
10
+ config.resume = False
11
+ config.output = None
12
+ config.embedding_size = 512
13
+ config.sample_rate = 1.0
14
+ config.fp16 = True
15
+ config.momentum = 0.9
16
+ config.weight_decay = 1e-4
17
+ config.batch_size = 128
18
+ config.lr = 0.1
19
+ config.verbose = 2000
20
+ config.dali = False
21
+
22
+ config.rec = "/train_tmp/glint360k"
23
+ config.num_classes = 360232
24
+ config.num_image = 17091657
25
+ config.num_epoch = 20
26
+ config.warmup_epoch = 0
27
+ config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
additional_modules/deep3dfacerecon/models/arcface_torch/configs/ms1mv2_mbf.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # make training faster
4
+ # our RAM is 256G
5
+ # mount -t tmpfs -o size=140G tmpfs /train_tmp
6
+
7
+ config = edict()
8
+ config.margin_list = (1.0, 0.5, 0.0)
9
+ config.network = "mbf"
10
+ config.resume = False
11
+ config.output = None
12
+ config.embedding_size = 512
13
+ config.sample_rate = 1.0
14
+ config.fp16 = True
15
+ config.momentum = 0.9
16
+ config.weight_decay = 1e-4
17
+ config.batch_size = 128
18
+ config.lr = 0.1
19
+ config.verbose = 2000
20
+ config.dali = False
21
+
22
+ config.rec = "/train_tmp/faces_emore"
23
+ config.num_classes = 85742
24
+ config.num_image = 5822653
25
+ config.num_epoch = 40
26
+ config.warmup_epoch = 0
27
+ config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
additional_modules/deep3dfacerecon/models/arcface_torch/configs/ms1mv2_r100.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # make training faster
4
+ # our RAM is 256G
5
+ # mount -t tmpfs -o size=140G tmpfs /train_tmp
6
+
7
+ config = edict()
8
+ config.margin_list = (1.0, 0.5, 0.0)
9
+ config.network = "r100"
10
+ config.resume = False
11
+ config.output = None
12
+ config.embedding_size = 512
13
+ config.sample_rate = 1.0
14
+ config.fp16 = True
15
+ config.momentum = 0.9
16
+ config.weight_decay = 5e-4
17
+ config.batch_size = 128
18
+ config.lr = 0.1
19
+ config.verbose = 2000
20
+ config.dali = False
21
+
22
+ config.rec = "/train_tmp/faces_emore"
23
+ config.num_classes = 85742
24
+ config.num_image = 5822653
25
+ config.num_epoch = 20
26
+ config.warmup_epoch = 0
27
+ config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
additional_modules/deep3dfacerecon/models/arcface_torch/configs/ms1mv2_r50.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # make training faster
4
+ # our RAM is 256G
5
+ # mount -t tmpfs -o size=140G tmpfs /train_tmp
6
+
7
+ config = edict()
8
+ config.margin_list = (1.0, 0.5, 0.0)
9
+ config.network = "r50"
10
+ config.resume = False
11
+ config.output = None
12
+ config.embedding_size = 512
13
+ config.sample_rate = 1.0
14
+ config.fp16 = True
15
+ config.momentum = 0.9
16
+ config.weight_decay = 5e-4
17
+ config.batch_size = 128
18
+ config.lr = 0.1
19
+ config.verbose = 2000
20
+ config.dali = False
21
+
22
+ config.rec = "/train_tmp/faces_emore"
23
+ config.num_classes = 85742
24
+ config.num_image = 5822653
25
+ config.num_epoch = 20
26
+ config.warmup_epoch = 0
27
+ config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
additional_modules/deep3dfacerecon/models/arcface_torch/configs/ms1mv3_mbf.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # make training faster
4
+ # our RAM is 256G
5
+ # mount -t tmpfs -o size=140G tmpfs /train_tmp
6
+
7
+ config = edict()
8
+ config.margin_list = (1.0, 0.5, 0.0)
9
+ config.network = "mbf"
10
+ config.resume = False
11
+ config.output = None
12
+ config.embedding_size = 512
13
+ config.sample_rate = 1.0
14
+ config.fp16 = True
15
+ config.momentum = 0.9
16
+ config.weight_decay = 1e-4
17
+ config.batch_size = 128
18
+ config.lr = 0.1
19
+ config.verbose = 2000
20
+ config.dali = False
21
+
22
+ config.rec = "/train_tmp/ms1m-retinaface-t1"
23
+ config.num_classes = 93431
24
+ config.num_image = 5179510
25
+ config.num_epoch = 40
26
+ config.warmup_epoch = 0
27
+ config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
additional_modules/deep3dfacerecon/models/arcface_torch/configs/ms1mv3_r100.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # make training faster
4
+ # our RAM is 256G
5
+ # mount -t tmpfs -o size=140G tmpfs /train_tmp
6
+
7
+ config = edict()
8
+ config.margin_list = (1.0, 0.5, 0.0)
9
+ config.network = "r100"
10
+ config.resume = False
11
+ config.output = None
12
+ config.embedding_size = 512
13
+ config.sample_rate = 1.0
14
+ config.fp16 = True
15
+ config.momentum = 0.9
16
+ config.weight_decay = 5e-4
17
+ config.batch_size = 128
18
+ config.lr = 0.1
19
+ config.verbose = 2000
20
+ config.dali = False
21
+
22
+ config.rec = "/train_tmp/ms1m-retinaface-t1"
23
+ config.num_classes = 93431
24
+ config.num_image = 5179510
25
+ config.num_epoch = 20
26
+ config.warmup_epoch = 0
27
+ config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
additional_modules/deep3dfacerecon/models/arcface_torch/configs/ms1mv3_r50.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # make training faster
4
+ # our RAM is 256G
5
+ # mount -t tmpfs -o size=140G tmpfs /train_tmp
6
+
7
+ config = edict()
8
+ config.margin_list = (1.0, 0.5, 0.0)
9
+ config.network = "r50"
10
+ config.resume = False
11
+ config.output = None
12
+ config.embedding_size = 512
13
+ config.sample_rate = 1.0
14
+ config.fp16 = True
15
+ config.momentum = 0.9
16
+ config.weight_decay = 5e-4
17
+ config.batch_size = 128
18
+ config.lr = 0.1
19
+ config.verbose = 2000
20
+ config.dali = False
21
+
22
+ config.rec = "/train_tmp/ms1m-retinaface-t1"
23
+ config.num_classes = 93431
24
+ config.num_image = 5179510
25
+ config.num_epoch = 20
26
+ config.warmup_epoch = 0
27
+ config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
additional_modules/deep3dfacerecon/models/arcface_torch/configs/ms1mv3_r50_onegpu.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from easydict import EasyDict as edict
2
+
3
+ # make training faster
4
+ # our RAM is 256G
5
+ # mount -t tmpfs -o size=140G tmpfs /train_tmp
6
+
7
+ config = edict()
8
+ config.margin_list = (1.0, 0.5, 0.0)
9
+ config.network = "r50"
10
+ config.resume = False
11
+ config.output = None
12
+ config.embedding_size = 512
13
+ config.sample_rate = 1.0
14
+ config.fp16 = True
15
+ config.momentum = 0.9
16
+ config.weight_decay = 5e-4
17
+ config.batch_size = 128
18
+ config.lr = 0.02
19
+ config.verbose = 2000
20
+ config.dali = False
21
+
22
+ config.rec = "/train_tmp/ms1m-retinaface-t1"
23
+ config.num_classes = 93431
24
+ config.num_image = 5179510
25
+ config.num_epoch = 20
26
+ config.warmup_epoch = 0
27
+ config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]