urikxx commited on
Commit
7555f39
·
verified ·
1 Parent(s): b89060c

Delete EchoMimic

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. EchoMimic/.gitignore +0 -4
  2. EchoMimic/.ipynb_checkpoints/Untitled-checkpoint.ipynb +0 -6
  3. EchoMimic/LICENSE +0 -201
  4. EchoMimic/README.md +0 -285
  5. EchoMimic/Untitled.ipynb +0 -315
  6. EchoMimic/assets/.ipynb_checkpoints/echomimic-checkpoint.png +0 -0
  7. EchoMimic/assets/driven_videos/a.mp4 +0 -3
  8. EchoMimic/assets/driven_videos/b.mp4 +0 -3
  9. EchoMimic/assets/driven_videos/c.mp4 +0 -3
  10. EchoMimic/assets/echomimic.png +0 -0
  11. EchoMimic/assets/test_audios/chunnuanhuakai.wav +0 -0
  12. EchoMimic/assets/test_audios/chunwang.wav +0 -0
  13. EchoMimic/assets/test_audios/echomimic.wav +0 -0
  14. EchoMimic/assets/test_audios/echomimic_en.wav +0 -0
  15. EchoMimic/assets/test_audios/echomimic_en_girl.wav +0 -0
  16. EchoMimic/assets/test_audios/echomimic_girl.wav +0 -0
  17. EchoMimic/assets/test_audios/jane.wav +0 -0
  18. EchoMimic/assets/test_audios/mei.wav +0 -0
  19. EchoMimic/assets/test_audios/walden.wav +0 -0
  20. EchoMimic/assets/test_audios/yun.wav +0 -0
  21. EchoMimic/assets/test_imgs/.ipynb_checkpoints/b-checkpoint.png +0 -0
  22. EchoMimic/assets/test_imgs/a.png +0 -0
  23. EchoMimic/assets/test_imgs/b.png +0 -0
  24. EchoMimic/assets/test_imgs/c.png +0 -3
  25. EchoMimic/assets/test_imgs/d.png +0 -3
  26. EchoMimic/assets/test_imgs/e.png +0 -3
  27. EchoMimic/assets/test_pose_demo/d.jpg +0 -0
  28. EchoMimic/assets/test_pose_demo_audios/movie_0_clip_0.wav +0 -3
  29. EchoMimic/assets/test_pose_demo_pose/0.pkl +0 -3
  30. EchoMimic/assets/test_pose_demo_pose/1.pkl +0 -3
  31. EchoMimic/assets/test_pose_demo_pose/10.pkl +0 -3
  32. EchoMimic/assets/test_pose_demo_pose/100.pkl +0 -3
  33. EchoMimic/assets/test_pose_demo_pose/101.pkl +0 -3
  34. EchoMimic/assets/test_pose_demo_pose/102.pkl +0 -3
  35. EchoMimic/assets/test_pose_demo_pose/103.pkl +0 -3
  36. EchoMimic/assets/test_pose_demo_pose/104.pkl +0 -3
  37. EchoMimic/assets/test_pose_demo_pose/105.pkl +0 -3
  38. EchoMimic/assets/test_pose_demo_pose/106.pkl +0 -3
  39. EchoMimic/assets/test_pose_demo_pose/107.pkl +0 -3
  40. EchoMimic/assets/test_pose_demo_pose/108.pkl +0 -3
  41. EchoMimic/assets/test_pose_demo_pose/109.pkl +0 -3
  42. EchoMimic/assets/test_pose_demo_pose/11.pkl +0 -3
  43. EchoMimic/assets/test_pose_demo_pose/110.pkl +0 -3
  44. EchoMimic/assets/test_pose_demo_pose/111.pkl +0 -3
  45. EchoMimic/assets/test_pose_demo_pose/112.pkl +0 -3
  46. EchoMimic/assets/test_pose_demo_pose/113.pkl +0 -3
  47. EchoMimic/assets/test_pose_demo_pose/114.pkl +0 -3
  48. EchoMimic/assets/test_pose_demo_pose/115.pkl +0 -3
  49. EchoMimic/assets/test_pose_demo_pose/116.pkl +0 -3
  50. EchoMimic/assets/test_pose_demo_pose/117.pkl +0 -3
EchoMimic/.gitignore DELETED
@@ -1,4 +0,0 @@
1
- ffmpeg-4.4-amd64-static
2
- pretrained_weights
3
- output
4
- __pycache__
 
 
 
 
 
EchoMimic/.ipynb_checkpoints/Untitled-checkpoint.ipynb DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "cells": [],
3
- "metadata": {},
4
- "nbformat": 4,
5
- "nbformat_minor": 5
6
- }
 
 
 
 
 
 
 
EchoMimic/LICENSE DELETED
@@ -1,201 +0,0 @@
1
- Apache License
2
- Version 2.0, January 2004
3
- http://www.apache.org/licenses/
4
-
5
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
-
7
- 1. Definitions.
8
-
9
- "License" shall mean the terms and conditions for use, reproduction,
10
- and distribution as defined by Sections 1 through 9 of this document.
11
-
12
- "Licensor" shall mean the copyright owner or entity authorized by
13
- the copyright owner that is granting the License.
14
-
15
- "Legal Entity" shall mean the union of the acting entity and all
16
- other entities that control, are controlled by, or are under common
17
- control with that entity. For the purposes of this definition,
18
- "control" means (i) the power, direct or indirect, to cause the
19
- direction or management of such entity, whether by contract or
20
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
- outstanding shares, or (iii) beneficial ownership of such entity.
22
-
23
- "You" (or "Your") shall mean an individual or Legal Entity
24
- exercising permissions granted by this License.
25
-
26
- "Source" form shall mean the preferred form for making modifications,
27
- including but not limited to software source code, documentation
28
- source, and configuration files.
29
-
30
- "Object" form shall mean any form resulting from mechanical
31
- transformation or translation of a Source form, including but
32
- not limited to compiled object code, generated documentation,
33
- and conversions to other media types.
34
-
35
- "Work" shall mean the work of authorship, whether in Source or
36
- Object form, made available under the License, as indicated by a
37
- copyright notice that is included in or attached to the work
38
- (an example is provided in the Appendix below).
39
-
40
- "Derivative Works" shall mean any work, whether in Source or Object
41
- form, that is based on (or derived from) the Work and for which the
42
- editorial revisions, annotations, elaborations, or other modifications
43
- represent, as a whole, an original work of authorship. For the purposes
44
- of this License, Derivative Works shall not include works that remain
45
- separable from, or merely link (or bind by name) to the interfaces of,
46
- the Work and Derivative Works thereof.
47
-
48
- "Contribution" shall mean any work of authorship, including
49
- the original version of the Work and any modifications or additions
50
- to that Work or Derivative Works thereof, that is intentionally
51
- submitted to Licensor for inclusion in the Work by the copyright owner
52
- or by an individual or Legal Entity authorized to submit on behalf of
53
- the copyright owner. For the purposes of this definition, "submitted"
54
- means any form of electronic, verbal, or written communication sent
55
- to the Licensor or its representatives, including but not limited to
56
- communication on electronic mailing lists, source code control systems,
57
- and issue tracking systems that are managed by, or on behalf of, the
58
- Licensor for the purpose of discussing and improving the Work, but
59
- excluding communication that is conspicuously marked or otherwise
60
- designated in writing by the copyright owner as "Not a Contribution."
61
-
62
- "Contributor" shall mean Licensor and any individual or Legal Entity
63
- on behalf of whom a Contribution has been received by Licensor and
64
- subsequently incorporated within the Work.
65
-
66
- 2. Grant of Copyright License. Subject to the terms and conditions of
67
- this License, each Contributor hereby grants to You a perpetual,
68
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
- copyright license to reproduce, prepare Derivative Works of,
70
- publicly display, publicly perform, sublicense, and distribute the
71
- Work and such Derivative Works in Source or Object form.
72
-
73
- 3. Grant of Patent License. Subject to the terms and conditions of
74
- this License, each Contributor hereby grants to You a perpetual,
75
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
- (except as stated in this section) patent license to make, have made,
77
- use, offer to sell, sell, import, and otherwise transfer the Work,
78
- where such license applies only to those patent claims licensable
79
- by such Contributor that are necessarily infringed by their
80
- Contribution(s) alone or by combination of their Contribution(s)
81
- with the Work to which such Contribution(s) was submitted. If You
82
- institute patent litigation against any entity (including a
83
- cross-claim or counterclaim in a lawsuit) alleging that the Work
84
- or a Contribution incorporated within the Work constitutes direct
85
- or contributory patent infringement, then any patent licenses
86
- granted to You under this License for that Work shall terminate
87
- as of the date such litigation is filed.
88
-
89
- 4. Redistribution. You may reproduce and distribute copies of the
90
- Work or Derivative Works thereof in any medium, with or without
91
- modifications, and in Source or Object form, provided that You
92
- meet the following conditions:
93
-
94
- (a) You must give any other recipients of the Work or
95
- Derivative Works a copy of this License; and
96
-
97
- (b) You must cause any modified files to carry prominent notices
98
- stating that You changed the files; and
99
-
100
- (c) You must retain, in the Source form of any Derivative Works
101
- that You distribute, all copyright, patent, trademark, and
102
- attribution notices from the Source form of the Work,
103
- excluding those notices that do not pertain to any part of
104
- the Derivative Works; and
105
-
106
- (d) If the Work includes a "NOTICE" text file as part of its
107
- distribution, then any Derivative Works that You distribute must
108
- include a readable copy of the attribution notices contained
109
- within such NOTICE file, excluding those notices that do not
110
- pertain to any part of the Derivative Works, in at least one
111
- of the following places: within a NOTICE text file distributed
112
- as part of the Derivative Works; within the Source form or
113
- documentation, if provided along with the Derivative Works; or,
114
- within a display generated by the Derivative Works, if and
115
- wherever such third-party notices normally appear. The contents
116
- of the NOTICE file are for informational purposes only and
117
- do not modify the License. You may add Your own attribution
118
- notices within Derivative Works that You distribute, alongside
119
- or as an addendum to the NOTICE text from the Work, provided
120
- that such additional attribution notices cannot be construed
121
- as modifying the License.
122
-
123
- You may add Your own copyright statement to Your modifications and
124
- may provide additional or different license terms and conditions
125
- for use, reproduction, or distribution of Your modifications, or
126
- for any such Derivative Works as a whole, provided Your use,
127
- reproduction, and distribution of the Work otherwise complies with
128
- the conditions stated in this License.
129
-
130
- 5. Submission of Contributions. Unless You explicitly state otherwise,
131
- any Contribution intentionally submitted for inclusion in the Work
132
- by You to the Licensor shall be under the terms and conditions of
133
- this License, without any additional terms or conditions.
134
- Notwithstanding the above, nothing herein shall supersede or modify
135
- the terms of any separate license agreement you may have executed
136
- with Licensor regarding such Contributions.
137
-
138
- 6. Trademarks. This License does not grant permission to use the trade
139
- names, trademarks, service marks, or product names of the Licensor,
140
- except as required for reasonable and customary use in describing the
141
- origin of the Work and reproducing the content of the NOTICE file.
142
-
143
- 7. Disclaimer of Warranty. Unless required by applicable law or
144
- agreed to in writing, Licensor provides the Work (and each
145
- Contributor provides its Contributions) on an "AS IS" BASIS,
146
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
- implied, including, without limitation, any warranties or conditions
148
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
- PARTICULAR PURPOSE. You are solely responsible for determining the
150
- appropriateness of using or redistributing the Work and assume any
151
- risks associated with Your exercise of permissions under this License.
152
-
153
- 8. Limitation of Liability. In no event and under no legal theory,
154
- whether in tort (including negligence), contract, or otherwise,
155
- unless required by applicable law (such as deliberate and grossly
156
- negligent acts) or agreed to in writing, shall any Contributor be
157
- liable to You for damages, including any direct, indirect, special,
158
- incidental, or consequential damages of any character arising as a
159
- result of this License or out of the use or inability to use the
160
- Work (including but not limited to damages for loss of goodwill,
161
- work stoppage, computer failure or malfunction, or any and all
162
- other commercial damages or losses), even if such Contributor
163
- has been advised of the possibility of such damages.
164
-
165
- 9. Accepting Warranty or Additional Liability. While redistributing
166
- the Work or Derivative Works thereof, You may choose to offer,
167
- and charge a fee for, acceptance of support, warranty, indemnity,
168
- or other liability obligations and/or rights consistent with this
169
- License. However, in accepting such obligations, You may act only
170
- on Your own behalf and on Your sole responsibility, not on behalf
171
- of any other Contributor, and only if You agree to indemnify,
172
- defend, and hold each Contributor harmless for any liability
173
- incurred by, or claims asserted against, such Contributor by reason
174
- of your accepting any such warranty or additional liability.
175
-
176
- END OF TERMS AND CONDITIONS
177
-
178
- APPENDIX: How to apply the Apache License to your work.
179
-
180
- To apply the Apache License to your work, attach the following
181
- boilerplate notice, with the fields enclosed by brackets "[]"
182
- replaced with your own identifying information. (Don't include
183
- the brackets!) The text should be enclosed in the appropriate
184
- comment syntax for the file format. We also recommend that a
185
- file or class name and description of purpose be included on the
186
- same "printed page" as the copyright notice for easier
187
- identification within third-party archives.
188
-
189
- Copyright [yyyy] [name of copyright owner]
190
-
191
- Licensed under the Apache License, Version 2.0 (the "License");
192
- you may not use this file except in compliance with the License.
193
- You may obtain a copy of the License at
194
-
195
- http://www.apache.org/licenses/LICENSE-2.0
196
-
197
- Unless required by applicable law or agreed to in writing, software
198
- distributed under the License is distributed on an "AS IS" BASIS,
199
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
- See the License for the specific language governing permissions and
201
- limitations under the License.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
EchoMimic/README.md DELETED
@@ -1,285 +0,0 @@
1
- <h1 align='center'>EchoMimic: Lifelike Audio-Driven Portrait Animations through Editable Landmark Conditioning</h1>
2
-
3
- <div align='center'>
4
- <a href='https://github.com/yuange250' target='_blank'>Zhiyuan Chen</a><sup>*</sup>&emsp;
5
- <a href='https://github.com/JoeFannie' target='_blank'>Jiajiong Cao</a><sup>*</sup>&emsp;
6
- <a href='https://github.com/octavianChen' target='_blank'>Zhiquan Chen</a><sup></sup>&emsp;
7
- <a href='https://github.com/lymhust' target='_blank'>Yuming Li</a><sup></sup>&emsp;
8
- <a href='https://github.com/' target='_blank'>Chenguang Ma</a><sup></sup>
9
- </div>
10
- <div align='center'>
11
- *Equal Contribution.
12
- </div>
13
-
14
- <div align='center'>
15
- Terminal Technology Department, Alipay, Ant Group.
16
- </div>
17
- <br>
18
- <div align='center'>
19
- <a href='https://badtobest.github.io/echomimic.html'><img src='https://img.shields.io/badge/Project-Page-blue'></a>
20
- <a href='https://huggingface.co/BadToBest/EchoMimic'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20HuggingFace-Model-yellow'></a>
21
- <a href='https://huggingface.co/spaces/BadToBest/EchoMimic'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20HuggingFace-Demo-yellow'></a>
22
- <a href='https://www.modelscope.cn/models/BadToBest/EchoMimic'><img src='https://img.shields.io/badge/ModelScope-Model-purple'></a>
23
- <a href='https://www.modelscope.cn/studios/BadToBest/BadToBest'><img src='https://img.shields.io/badge/ModelScope-Demo-purple'></a>
24
- <a href='https://arxiv.org/abs/2407.08136'><img src='https://img.shields.io/badge/Paper-Arxiv-red'></a>
25
- <a href='assets/echomimic.png'><img src='https://badges.aleen42.com/src/wechat.svg'></a>
26
- </div>
27
-
28
- ## &#x1F4E3; &#x1F4E3; Updates
29
- * [2024.08.02] 🔥 EchoMimic is now available on [huggingface](https://huggingface.co/spaces/BadToBest/EchoMimic) with A100 GPU. Thanks Wenmeng Zhou@ModelScope.
30
- * [2024.07.25] 🔥🔥🔥 Accelerated models and pipe on **Audio Driven** are released. The inference speed can be improved by **10x** (from ~7mins/240frames to ~50s/240frames on V100 GPU)
31
- * [2024.07.23] 🔥 EchoMimic gradio demo on [modelscope](https://www.modelscope.cn/studios/BadToBest/BadToBest) is ready.
32
- * [2024.07.23] 🔥 EchoMimic gradio demo on [huggingface](https://huggingface.co/spaces/fffiloni/EchoMimic) is ready. Thanks Sylvain Filoni@fffiloni.
33
- * [2024.07.17] 🔥🔥🔥 Accelerated models and pipe on **Audio + Selected Landmarks** are released. The inference speed can be improved by **10x** (from ~7mins/240frames to ~50s/240frames on V100 GPU)
34
- * [2024.07.14] 🔥 [ComfyUI](https://github.com/smthemex/ComfyUI_EchoMimic) is now available. Thanks @smthemex for the contribution.
35
- * [2024.07.13] 🔥 Thanks [NewGenAI](https://www.youtube.com/@StableAIHub) for the [video installation tutorial](https://www.youtube.com/watch?v=8R0lTIY7tfI).
36
- * [2024.07.13] 🔥 We release our pose&audio driven codes and models.
37
- * [2024.07.12] 🔥 WebUI and GradioUI versions are released. We thank @greengerong @Robin021 and @O-O1024 for their contributions.
38
- * [2024.07.12] 🔥 Our [paper](https://arxiv.org/abs/2407.08136) is in public on arxiv.
39
- * [2024.07.09] 🔥 We release our audio driven codes and models.
40
-
41
- ## Gallery
42
- ### Audio Driven (Sing)
43
-
44
- <table class="center">
45
-
46
- <tr>
47
- <td width=30% style="border: none">
48
- <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/d014d921-9f94-4640-97ad-035b00effbfe" muted="false"></video>
49
- </td>
50
- <td width=30% style="border: none">
51
- <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/877603a5-a4f9-4486-a19f-8888422daf78" muted="false"></video>
52
- </td>
53
- <td width=30% style="border: none">
54
- <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/e0cb5afb-40a6-4365-84f8-cb2834c4cfe7" muted="false"></video>
55
- </td>
56
- </tr>
57
-
58
- </table>
59
-
60
- ### Audio Driven (English)
61
-
62
- <table class="center">
63
-
64
- <tr>
65
- <td width=30% style="border: none">
66
- <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/386982cd-3ff8-470d-a6d9-b621e112f8a5" muted="false"></video>
67
- </td>
68
- <td width=30% style="border: none">
69
- <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/5c60bb91-1776-434e-a720-8857a00b1501" muted="false"></video>
70
- </td>
71
- <td width=30% style="border: none">
72
- <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/1f15adc5-0f33-4afa-b96a-2011886a4a06" muted="false"></video>
73
- </td>
74
- </tr>
75
-
76
- </table>
77
-
78
- ### Audio Driven (Chinese)
79
-
80
- <table class="center">
81
-
82
- <tr>
83
- <td width=30% style="border: none">
84
- <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/a8092f9a-a5dc-4cd6-95be-1831afaccf00" muted="false"></video>
85
- </td>
86
- <td width=30% style="border: none">
87
- <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/c8b5c59f-0483-42ef-b3ee-4cffae6c7a52" muted="false"></video>
88
- </td>
89
- <td width=30% style="border: none">
90
- <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/532a3e60-2bac-4039-a06c-ff6bf06cb4a4" muted="false"></video>
91
- </td>
92
- </tr>
93
-
94
- </table>
95
-
96
- ### Landmark Driven
97
-
98
- <table class="center">
99
-
100
- <tr>
101
- <td width=30% style="border: none">
102
- <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/1da6c46f-4532-4375-a0dc-0a4d6fd30a39" muted="false"></video>
103
- </td>
104
- <td width=30% style="border: none">
105
- <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/d4f4d5c1-e228-463a-b383-27fb90ed6172" muted="false"></video>
106
- </td>
107
- <td width=30% style="border: none">
108
- <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/18bd2c93-319e-4d1c-8255-3f02ba717475" muted="false"></video>
109
- </td>
110
- </tr>
111
-
112
- </table>
113
-
114
- ### Audio + Selected Landmark Driven
115
-
116
- <table class="center">
117
-
118
- <tr>
119
- <td width=30% style="border: none">
120
- <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/4a29d735-ec1b-474d-b843-3ff0bdf85f55" muted="false"></video>
121
- </td>
122
- <td width=30% style="border: none">
123
- <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/b994c8f5-8dae-4dd8-870f-962b50dc091f" muted="false"></video>
124
- </td>
125
- <td width=30% style="border: none">
126
- <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/955c1d51-07b2-494d-ab93-895b9c43b896" muted="false"></video>
127
- </td>
128
- </tr>
129
-
130
- </table>
131
-
132
- **(Some demo images above are sourced from image websites. If there is any infringement, we will immediately remove them and apologize.)**
133
-
134
- ## Installation
135
-
136
- ### Download the Codes
137
-
138
- ```bash
139
- git clone https://github.com/BadToBest/EchoMimic
140
- cd EchoMimic
141
- ```
142
-
143
- ### Python Environment Setup
144
-
145
- - Tested System Environment: Centos 7.2/Ubuntu 22.04, Cuda >= 11.7
146
- - Tested GPUs: A100(80G) / RTX4090D (24G) / V100(16G)
147
- - Tested Python Version: 3.8 / 3.10 / 3.11
148
-
149
- Create conda environment (Recommended):
150
-
151
- ```bash
152
- conda create -n echomimic python=3.8
153
- conda activate echomimic
154
- ```
155
-
156
- Install packages with `pip`
157
- ```bash
158
- pip install -r requirements.txt
159
- ```
160
-
161
- ### Download ffmpeg-static
162
- Download and decompress [ffmpeg-static](https://www.johnvansickle.com/ffmpeg/old-releases/ffmpeg-4.4-amd64-static.tar.xz), then
163
- ```
164
- export FFMPEG_PATH=/path/to/ffmpeg-4.4-amd64-static
165
- ```
166
-
167
- ### Download pretrained weights
168
-
169
- ```shell
170
- git lfs install
171
- git clone https://huggingface.co/BadToBest/EchoMimic pretrained_weights
172
- ```
173
-
174
- The **pretrained_weights** is organized as follows.
175
-
176
- ```
177
- ./pretrained_weights/
178
- ├── denoising_unet.pth
179
- ├── reference_unet.pth
180
- ├── motion_module.pth
181
- ├── face_locator.pth
182
- ├── sd-vae-ft-mse
183
- │ └── ...
184
- ├── sd-image-variations-diffusers
185
- │ └── ...
186
- └── audio_processor
187
- └── whisper_tiny.pt
188
- ```
189
-
190
- In which **denoising_unet.pth** / **reference_unet.pth** / **motion_module.pth** / **face_locator.pth** are the main checkpoints of **EchoMimic**. Other models in this hub can be also downloaded from it's original hub, thanks to their brilliant works:
191
- - [sd-vae-ft-mse](https://huggingface.co/stabilityai/sd-vae-ft-mse)
192
- - [sd-image-variations-diffusers](https://huggingface.co/lambdalabs/sd-image-variations-diffusers)
193
- - [audio_processor(whisper)](https://openaipublic.azureedge.net/main/whisper/models/65147644a518d12f04e32d6f3b26facc3f8dd46e5390956a9424a650c0ce22b9/tiny.pt)
194
-
195
- ### Audio-Drived Algo Inference
196
- Run the python inference script:
197
-
198
- ```bash
199
- python -u infer_audio2vid.py
200
- python -u infer_audio2vid_pose.py
201
- ```
202
-
203
- ### Audio-Drived Algo Inference On Your Own Cases
204
-
205
- Edit the inference config file **./configs/prompts/animation.yaml**, and add your own case:
206
-
207
- ```bash
208
- test_cases:
209
- "path/to/your/image":
210
- - "path/to/your/audio"
211
- ```
212
-
213
- The run the python inference script:
214
- ```bash
215
- python -u infer_audio2vid.py
216
- ```
217
-
218
- ### Motion Alignment between Ref. Img. and Driven Vid.
219
-
220
- (Firstly download the checkpoints with '_pose.pth' postfix from huggingface)
221
-
222
- Edit driver_video and ref_image to your path in demo_motion_sync.py, then run
223
- ```bash
224
- python -u demo_motion_sync.py
225
- ```
226
-
227
- ### Audio&Pose-Drived Algo Inference
228
- Edit ./configs/prompts/animation_pose.yaml, then run
229
- ```bash
230
- python -u infer_audio2vid_pose.py
231
- ```
232
-
233
- ### Pose-Drived Algo Inference
234
- Set draw_mouse=True in line 135 of infer_audio2vid_pose.py. Edit ./configs/prompts/animation_pose.yaml, then run
235
- ```bash
236
- python -u infer_audio2vid_pose.py
237
- ```
238
-
239
- ### Run the Gradio UI
240
-
241
- Thanks to the contribution from @Robin021:
242
-
243
- ```bash
244
-
245
- python -u webgui.py --server_port=3000
246
-
247
- ```
248
-
249
- ## Release Plans
250
-
251
- | Status | Milestone | ETA |
252
- |:--------:|:-------------------------------------------------------------------------|:--:|
253
- | ✅ | The inference source code of the Audio-Driven algo meet everyone on GitHub | 9th July, 2024 |
254
- | ✅ | Pretrained models trained on English and Mandarin Chinese to be released | 9th July, 2024 |
255
- | ✅ | The inference source code of the Pose-Driven algo meet everyone on GitHub | 13th July, 2024 |
256
- | ✅ | Pretrained models with better pose control to be released | 13th July, 2024 |
257
- | ✅ | Accelerated models to be released | 17th July, 2024 |
258
- | 🚀 | Pretrained models with better sing performance to be released | TBD |
259
- | 🚀 | Large-Scale and High-resolution Chinese-Based Talking Head Dataset | TBD |
260
-
261
- ## Acknowledgements
262
-
263
- We would like to thank the contributors to the [AnimateDiff](https://github.com/guoyww/AnimateDiff), [Moore-AnimateAnyone](https://github.com/MooreThreads/Moore-AnimateAnyone) and [MuseTalk](https://github.com/TMElyralab/MuseTalk) repositories, for their open research and exploration.
264
-
265
- We are also grateful to [V-Express](https://github.com/tencent-ailab/V-Express) and [hallo](https://github.com/fudan-generative-vision/hallo) for their outstanding work in the area of diffusion-based talking heads.
266
-
267
- If we missed any open-source projects or related articles, we would like to complement the acknowledgement of this specific work immediately.
268
-
269
- ## Citation
270
-
271
- If you find our work useful for your research, please consider citing the paper :
272
-
273
- ```
274
- @misc{chen2024echomimic,
275
- title={EchoMimic: Lifelike Audio-Driven Portrait Animations through Editable Landmark Conditioning},
276
- author={Zhiyuan Chen, Jiajiong Cao, Zhiquan Chen, Yuming Li, Chenguang Ma},
277
- year={2024},
278
- archivePrefix={arXiv},
279
- primaryClass={cs.CV}
280
- }
281
- ```
282
-
283
- ## Star History
284
-
285
- [![Star History Chart](https://api.star-history.com/svg?repos=BadToBest/EchoMimic&type=Date)](https://star-history.com/?spm=5176.28103460.0.0.342a3da23STWrU#BadToBest/EchoMimic&Date)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
EchoMimic/Untitled.ipynb DELETED
@@ -1,315 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "id": "4a8767df-feea-4bda-8bfb-ab07f667cd11",
7
- "metadata": {},
8
- "outputs": [
9
- {
10
- "ename": "ModuleNotFoundError",
11
- "evalue": "No module named 'cv2'",
12
- "output_type": "error",
13
- "traceback": [
14
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
15
- "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
16
- "Cell \u001b[0;32mIn[1], line 12\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatetime\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m datetime\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpathlib\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Path\n\u001b[0;32m---> 12\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mcv2\u001b[39;00m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\n",
17
- "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'cv2'"
18
- ]
19
- }
20
- ],
21
- "source": [
22
- "#!/usr/bin/env python\n",
23
- "# -*- coding: UTF-8 -*-\n",
24
- "'''\n",
25
- "webui\n",
26
- "'''\n",
27
- "\n",
28
- "import os\n",
29
- "import random\n",
30
- "from datetime import datetime\n",
31
- "from pathlib import Path\n",
32
- "\n",
33
- "import cv2\n",
34
- "import numpy as np\n",
35
- "import torch\n",
36
- "from diffusers import AutoencoderKL, DDIMScheduler\n",
37
- "from omegaconf import OmegaConf\n",
38
- "from PIL import Image\n",
39
- "from src.models.unet_2d_condition import UNet2DConditionModel\n",
40
- "from src.models.unet_3d_echo import EchoUNet3DConditionModel\n",
41
- "from src.models.whisper.audio2feature import load_audio_model\n",
42
- "from src.pipelines.pipeline_echo_mimic import Audio2VideoPipeline\n",
43
- "from src.utils.util import save_videos_grid, crop_and_pad\n",
44
- "from src.models.face_locator import FaceLocator\n",
45
- "from moviepy.editor import VideoFileClip, AudioFileClip\n",
46
- "from facenet_pytorch import MTCNN\n",
47
- "import argparse\n",
48
- "\n",
49
- "import gradio as gr\n",
50
- "\n",
51
- "default_values = {\n",
52
- " \"width\": 512,\n",
53
- " \"height\": 512,\n",
54
- " \"length\": 1200,\n",
55
- " \"seed\": 420,\n",
56
- " \"facemask_dilation_ratio\": 0.1,\n",
57
- " \"facecrop_dilation_ratio\": 0.5,\n",
58
- " \"context_frames\": 12,\n",
59
- " \"context_overlap\": 3,\n",
60
- " \"cfg\": 2.5,\n",
61
- " \"steps\": 30,\n",
62
- " \"sample_rate\": 16000,\n",
63
- " \"fps\": 24,\n",
64
- " \"device\": \"cuda\"\n",
65
- "}\n",
66
- "\n",
67
- "ffmpeg_path = os.getenv('FFMPEG_PATH')\n",
68
- "if ffmpeg_path is None:\n",
69
- " print(\"please download ffmpeg-static and export to FFMPEG_PATH. \\nFor example: export FFMPEG_PATH=/musetalk/ffmpeg-4.4-amd64-static\")\n",
70
- "elif ffmpeg_path not in os.getenv('PATH'):\n",
71
- " print(\"add ffmpeg to path\")\n",
72
- " os.environ[\"PATH\"] = f\"{ffmpeg_path}:{os.environ['PATH']}\"\n",
73
- "\n",
74
- "\n",
75
- "config_path = \"./configs/prompts/animation.yaml\"\n",
76
- "config = OmegaConf.load(config_path)\n",
77
- "if config.weight_dtype == \"fp16\":\n",
78
- " weight_dtype = torch.float16\n",
79
- "else:\n",
80
- " weight_dtype = torch.float32\n",
81
- "\n",
82
- "device = \"cuda\"\n",
83
- "if not torch.cuda.is_available():\n",
84
- " device = \"cpu\"\n",
85
- "\n",
86
- "inference_config_path = config.inference_config\n",
87
- "infer_config = OmegaConf.load(inference_config_path)\n",
88
- "\n",
89
- "############# model_init started #############\n",
90
- "## vae init\n",
91
- "vae = AutoencoderKL.from_pretrained(config.pretrained_vae_path).to(\"cuda\", dtype=weight_dtype)\n",
92
- "\n",
93
- "## reference net init\n",
94
- "reference_unet = UNet2DConditionModel.from_pretrained(\n",
95
- " config.pretrained_base_model_path,\n",
96
- " subfolder=\"unet\",\n",
97
- ").to(dtype=weight_dtype, device=device)\n",
98
- "reference_unet.load_state_dict(torch.load(config.reference_unet_path, map_location=\"cpu\"))\n",
99
- "\n",
100
- "## denoising net init\n",
101
- "if os.path.exists(config.motion_module_path):\n",
102
- " ### stage1 + stage2\n",
103
- " denoising_unet = EchoUNet3DConditionModel.from_pretrained_2d(\n",
104
- " config.pretrained_base_model_path,\n",
105
- " config.motion_module_path,\n",
106
- " subfolder=\"unet\",\n",
107
- " unet_additional_kwargs=infer_config.unet_additional_kwargs,\n",
108
- " ).to(dtype=weight_dtype, device=device)\n",
109
- "else:\n",
110
- " ### only stage1\n",
111
- " denoising_unet = EchoUNet3DConditionModel.from_pretrained_2d(\n",
112
- " config.pretrained_base_model_path,\n",
113
- " \"\",\n",
114
- " subfolder=\"unet\",\n",
115
- " unet_additional_kwargs={\n",
116
- " \"use_motion_module\": False,\n",
117
- " \"unet_use_temporal_attention\": False,\n",
118
- " \"cross_attention_dim\": infer_config.unet_additional_kwargs.cross_attention_dim\n",
119
- " }\n",
120
- " ).to(dtype=weight_dtype, device=device)\n",
121
- "\n",
122
- "denoising_unet.load_state_dict(torch.load(config.denoising_unet_path, map_location=\"cpu\"), strict=False)\n",
123
- "\n",
124
- "## face locator init\n",
125
- "face_locator = FaceLocator(320, conditioning_channels=1, block_out_channels=(16, 32, 96, 256)).to(dtype=weight_dtype, device=\"cuda\")\n",
126
- "face_locator.load_state_dict(torch.load(config.face_locator_path))\n",
127
- "\n",
128
- "## load audio processor params\n",
129
- "audio_processor = load_audio_model(model_path=config.audio_model_path, device=device)\n",
130
- "\n",
131
- "## load face detector params\n",
132
- "face_detector = MTCNN(image_size=320, margin=0, min_face_size=20, thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True, device=device)\n",
133
- "\n",
134
- "############# model_init finished #############\n",
135
- "\n",
136
- "sched_kwargs = OmegaConf.to_container(infer_config.noise_scheduler_kwargs)\n",
137
- "scheduler = DDIMScheduler(**sched_kwargs)\n",
138
- "\n",
139
- "pipe = Audio2VideoPipeline(\n",
140
- " vae=vae,\n",
141
- " reference_unet=reference_unet,\n",
142
- " denoising_unet=denoising_unet,\n",
143
- " audio_guider=audio_processor,\n",
144
- " face_locator=face_locator,\n",
145
- " scheduler=scheduler,\n",
146
- ").to(\"cuda\", dtype=weight_dtype)\n",
147
- "\n",
148
- "def select_face(det_bboxes, probs):\n",
149
- " ## max face from faces that the prob is above 0.8\n",
150
- " ## box: xyxy\n",
151
- " if det_bboxes is None or probs is None:\n",
152
- " return None\n",
153
- " filtered_bboxes = []\n",
154
- " for bbox_i in range(len(det_bboxes)):\n",
155
- " if probs[bbox_i] > 0.8:\n",
156
- " filtered_bboxes.append(det_bboxes[bbox_i])\n",
157
- " if len(filtered_bboxes) == 0:\n",
158
- " return None\n",
159
- " sorted_bboxes = sorted(filtered_bboxes, key=lambda x:(x[3]-x[1]) * (x[2] - x[0]), reverse=True)\n",
160
- " return sorted_bboxes[0]\n",
161
- "\n",
162
- "def process_video(uploaded_img, uploaded_audio, width, height, length, seed, facemask_dilation_ratio, facecrop_dilation_ratio, context_frames, context_overlap, cfg, steps, sample_rate, fps, device):\n",
163
- "\n",
164
- " if seed is not None and seed > -1:\n",
165
- " generator = torch.manual_seed(seed)\n",
166
- " else:\n",
167
- " generator = torch.manual_seed(random.randint(100, 1000000))\n",
168
- "\n",
169
- " #### face musk prepare\n",
170
- " face_img = cv2.imread(uploaded_img)\n",
171
- " face_mask = np.zeros((face_img.shape[0], face_img.shape[1])).astype('uint8')\n",
172
- " det_bboxes, probs = face_detector.detect(face_img)\n",
173
- " select_bbox = select_face(det_bboxes, probs)\n",
174
- " if select_bbox is None:\n",
175
- " face_mask[:, :] = 255\n",
176
- " else:\n",
177
- " xyxy = select_bbox[:4]\n",
178
- " xyxy = np.round(xyxy).astype('int')\n",
179
- " rb, re, cb, ce = xyxy[1], xyxy[3], xyxy[0], xyxy[2]\n",
180
- " r_pad = int((re - rb) * facemask_dilation_ratio)\n",
181
- " c_pad = int((ce - cb) * facemask_dilation_ratio)\n",
182
- " face_mask[rb - r_pad : re + r_pad, cb - c_pad : ce + c_pad] = 255\n",
183
- " \n",
184
- " #### face crop\n",
185
- " r_pad_crop = int((re - rb) * facecrop_dilation_ratio)\n",
186
- " c_pad_crop = int((ce - cb) * facecrop_dilation_ratio)\n",
187
- " crop_rect = [max(0, cb - c_pad_crop), max(0, rb - r_pad_crop), min(ce + c_pad_crop, face_img.shape[1]), min(re + r_pad_crop, face_img.shape[0])]\n",
188
- " face_img = crop_and_pad(face_img, crop_rect)\n",
189
- " face_mask = crop_and_pad(face_mask, crop_rect)\n",
190
- " face_img = cv2.resize(face_img, (width, height))\n",
191
- " face_mask = cv2.resize(face_mask, (width, height))\n",
192
- "\n",
193
- " ref_image_pil = Image.fromarray(face_img[:, :, [2, 1, 0]])\n",
194
- " face_mask_tensor = torch.Tensor(face_mask).to(dtype=weight_dtype, device=\"cuda\").unsqueeze(0).unsqueeze(0).unsqueeze(0) / 255.0\n",
195
- " \n",
196
- " video = pipe(\n",
197
- " ref_image_pil,\n",
198
- " uploaded_audio,\n",
199
- " face_mask_tensor,\n",
200
- " width,\n",
201
- " height,\n",
202
- " length,\n",
203
- " steps,\n",
204
- " cfg,\n",
205
- " generator=generator,\n",
206
- " audio_sample_rate=sample_rate,\n",
207
- " context_frames=context_frames,\n",
208
- " fps=fps,\n",
209
- " context_overlap=context_overlap\n",
210
- " ).videos\n",
211
- "\n",
212
- " save_dir = Path(\"output/tmp\")\n",
213
- " save_dir.mkdir(exist_ok=True, parents=True)\n",
214
- " output_video_path = save_dir / \"output_video.mp4\"\n",
215
- " save_videos_grid(video, str(output_video_path), n_rows=1, fps=fps)\n",
216
- "\n",
217
- " video_clip = VideoFileClip(str(output_video_path))\n",
218
- " audio_clip = AudioFileClip(uploaded_audio)\n",
219
- " final_output_path = save_dir / \"output_video_with_audio.mp4\"\n",
220
- " video_clip = video_clip.set_audio(audio_clip)\n",
221
- " video_clip.write_videofile(str(final_output_path), codec=\"libx264\", audio_codec=\"aac\")\n",
222
- "\n",
223
- " return final_output_path\n",
224
- " \n",
225
- "with gr.Blocks() as demo:\n",
226
- " gr.Markdown('# EchoMimic')\n",
227
- " gr.Markdown('![]()')\n",
228
- " with gr.Row():\n",
229
- " with gr.Column():\n",
230
- " uploaded_img = gr.Image(type=\"filepath\", label=\"Reference Image\")\n",
231
- " uploaded_audio = gr.Audio(type=\"filepath\", label=\"Input Audio\")\n",
232
- " with gr.Column():\n",
233
- " output_video = gr.Video()\n",
234
- "\n",
235
- " with gr.Accordion(\"Configuration\", open=False):\n",
236
- " width = gr.Slider(label=\"Width\", minimum=128, maximum=1024, value=default_values[\"width\"])\n",
237
- " height = gr.Slider(label=\"Height\", minimum=128, maximum=1024, value=default_values[\"height\"])\n",
238
- " length = gr.Slider(label=\"Length\", minimum=100, maximum=5000, value=default_values[\"length\"])\n",
239
- " seed = gr.Slider(label=\"Seed\", minimum=0, maximum=10000, value=default_values[\"seed\"])\n",
240
- " facemask_dilation_ratio = gr.Slider(label=\"Facemask Dilation Ratio\", minimum=0.0, maximum=1.0, step=0.01, value=default_values[\"facemask_dilation_ratio\"])\n",
241
- " facecrop_dilation_ratio = gr.Slider(label=\"Facecrop Dilation Ratio\", minimum=0.0, maximum=1.0, step=0.01, value=default_values[\"facecrop_dilation_ratio\"])\n",
242
- " context_frames = gr.Slider(label=\"Context Frames\", minimum=0, maximum=50, step=1, value=default_values[\"context_frames\"])\n",
243
- " context_overlap = gr.Slider(label=\"Context Overlap\", minimum=0, maximum=10, step=1, value=default_values[\"context_overlap\"])\n",
244
- " cfg = gr.Slider(label=\"CFG\", minimum=0.0, maximum=10.0, step=0.1, value=default_values[\"cfg\"])\n",
245
- " steps = gr.Slider(label=\"Steps\", minimum=1, maximum=100, step=1, value=default_values[\"steps\"])\n",
246
- " sample_rate = gr.Slider(label=\"Sample Rate\", minimum=8000, maximum=48000, step=1000, value=default_values[\"sample_rate\"])\n",
247
- " fps = gr.Slider(label=\"FPS\", minimum=1, maximum=60, step=1, value=default_values[\"fps\"])\n",
248
- " device = gr.Radio(label=\"Device\", choices=[\"cuda\", \"cpu\"], value=default_values[\"device\"])\n",
249
- "\n",
250
- " generate_button = gr.Button(\"Generate Video\")\n",
251
- "\n",
252
- " def generate_video(uploaded_img, uploaded_audio, width, height, length, seed, facemask_dilation_ratio, facecrop_dilation_ratio, context_frames, context_overlap, cfg, steps, sample_rate, fps, device):\n",
253
- "\n",
254
- " final_output_path = process_video(\n",
255
- " uploaded_img, uploaded_audio, width, height, length, seed, facemask_dilation_ratio, facecrop_dilation_ratio, context_frames, context_overlap, cfg, steps, sample_rate, fps, device\n",
256
- " ) \n",
257
- " output_video= final_output_path\n",
258
- " return final_output_path\n",
259
- "\n",
260
- " generate_button.click(\n",
261
- " generate_video,\n",
262
- " inputs=[\n",
263
- " uploaded_img,\n",
264
- " uploaded_audio,\n",
265
- " width,\n",
266
- " height,\n",
267
- " length,\n",
268
- " seed,\n",
269
- " facemask_dilation_ratio,\n",
270
- " facecrop_dilation_ratio,\n",
271
- " context_frames,\n",
272
- " context_overlap,\n",
273
- " cfg,\n",
274
- " steps,\n",
275
- " sample_rate,\n",
276
- " fps,\n",
277
- " device\n",
278
- " ],\n",
279
- " outputs=output_video\n",
280
- " )\n",
281
- "parser = argparse.ArgumentParser(description='EchoMimic')\n",
282
- "parser.add_argument('--server_name', type=str, default='0.0.0.0', help='Server name')\n",
283
- "parser.add_argument('--server_port', type=int, default=7680, help='Server port')\n",
284
- "args = parser.parse_args()\n",
285
- "\n",
286
- "# demo.launch(server_name=args.server_name, server_port=args.server_port, inbrowser=True)\n",
287
- "\n",
288
- "if __name__ == '__main__':\n",
289
- " #demo.launch(server_name='0.0.0.0')\n",
290
- " demo.launch(server_name=args.server_name, server_port=args.server_port, inbrowser=True, share=True)"
291
- ]
292
- }
293
- ],
294
- "metadata": {
295
- "kernelspec": {
296
- "display_name": "Python 3 (ipykernel)",
297
- "language": "python",
298
- "name": "python3"
299
- },
300
- "language_info": {
301
- "codemirror_mode": {
302
- "name": "ipython",
303
- "version": 3
304
- },
305
- "file_extension": ".py",
306
- "mimetype": "text/x-python",
307
- "name": "python",
308
- "nbconvert_exporter": "python",
309
- "pygments_lexer": "ipython3",
310
- "version": "3.10.13"
311
- }
312
- },
313
- "nbformat": 4,
314
- "nbformat_minor": 5
315
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
EchoMimic/assets/.ipynb_checkpoints/echomimic-checkpoint.png DELETED
Binary file (677 kB)
 
EchoMimic/assets/driven_videos/a.mp4 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4889c1ecc27aea2be958f67ad417bd0b11b6626d5ca1edd692c52a3fe1e73ee
3
- size 1798555
 
 
 
 
EchoMimic/assets/driven_videos/b.mp4 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:97c5cc480eecf21130b5d17807f23242b67861b58a36f308df7159aa5d0b8dc0
3
- size 2072709
 
 
 
 
EchoMimic/assets/driven_videos/c.mp4 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:228736987a683026e3220ed1bca8775ec44692e75cbddbb4c132903d90b022a5
3
- size 2340154
 
 
 
 
EchoMimic/assets/echomimic.png DELETED
Binary file (677 kB)
 
EchoMimic/assets/test_audios/chunnuanhuakai.wav DELETED
Binary file (511 kB)
 
EchoMimic/assets/test_audios/chunwang.wav DELETED
Binary file (236 kB)
 
EchoMimic/assets/test_audios/echomimic.wav DELETED
Binary file (217 kB)
 
EchoMimic/assets/test_audios/echomimic_en.wav DELETED
Binary file (251 kB)
 
EchoMimic/assets/test_audios/echomimic_en_girl.wav DELETED
Binary file (234 kB)
 
EchoMimic/assets/test_audios/echomimic_girl.wav DELETED
Binary file (253 kB)
 
EchoMimic/assets/test_audios/jane.wav DELETED
Binary file (756 kB)
 
EchoMimic/assets/test_audios/mei.wav DELETED
Binary file (270 kB)
 
EchoMimic/assets/test_audios/walden.wav DELETED
Binary file (391 kB)
 
EchoMimic/assets/test_audios/yun.wav DELETED
Binary file (661 kB)
 
EchoMimic/assets/test_imgs/.ipynb_checkpoints/b-checkpoint.png DELETED
Binary file (293 kB)
 
EchoMimic/assets/test_imgs/a.png DELETED
Binary file (84 kB)
 
EchoMimic/assets/test_imgs/b.png DELETED
Binary file (293 kB)
 
EchoMimic/assets/test_imgs/c.png DELETED

Git LFS Details

  • SHA256: c8d03b2d81b0338676e3d31534788c4b13d21988cb026ad26d9c74ec83267367
  • Pointer size: 132 Bytes
  • Size of remote file: 1.96 MB
EchoMimic/assets/test_imgs/d.png DELETED

Git LFS Details

  • SHA256: a89c0663a5cc4e5c872bbf0a9883b327080b855af4d54701a0c0abc0b2057fe5
  • Pointer size: 132 Bytes
  • Size of remote file: 1.24 MB
EchoMimic/assets/test_imgs/e.png DELETED

Git LFS Details

  • SHA256: b24837b7f1c3341fe7e3089314ee32428ae0aa0435df463c1087aa1ea3c644da
  • Pointer size: 132 Bytes
  • Size of remote file: 1.15 MB
EchoMimic/assets/test_pose_demo/d.jpg DELETED
Binary file (112 kB)
 
EchoMimic/assets/test_pose_demo_audios/movie_0_clip_0.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb6994f9ce1788e7672d2489fb05e21459e0869e44202634a503e2a556ac939c
3
- size 1771130
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/0.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:79f459214a9284b9a80b38c3fb64c9d24890fe5472a0220f6c0327742c0cc856
3
- size 11625
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/1.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:acb82a39521d5d774f67546e94aaf2cf743e2f16e4894d7819241dd290c004ee
3
- size 11625
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/10.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a30a8350432635fc65861c30c4433b03f0ddff615628356cd8e6eeb36bd0c985
3
- size 11625
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/100.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:425a4c9428f09e1b73da0b31d4aa5df50421085b18dbc3eccfdf85ab40e97770
3
- size 11625
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/101.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d06bc52421328959f16fc95e7495767c982870a88e1ec78d681cc06dc7bd4f09
3
- size 11625
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/102.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:135ee941d0c7bf27c1808d19a2e56a89e4f96a5b89c3f9c8793682cf2a707bf0
3
- size 11625
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/103.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f0a3b12714d02f933cca212689d5727fd5a87e3fdd29c7ed6d403a0367a2312
3
- size 11625
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/104.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:43bec0c1442aaba6a20ec49aa472fbd35ce3b42f24567f698a92ade73de6fc50
3
- size 11625
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/105.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4143b52e0430a7ae75d2a9785bdea5505ae2936e47531273086961a2f63c2cac
3
- size 11625
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/106.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:10396bf15c0fa9bd7e3d925aced0009ae74c3353daa5076dd74bacad37572603
3
- size 11625
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/107.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ebcf965e910b130f48f1c1dd35b68176f13e1d7f546681eaf37a959b0325ebb
3
- size 11625
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/108.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:25546b95ca775dc94bd9439ab748cacab1d13b8ddae2a35c931a92fc8669c22f
3
- size 11625
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/109.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:16068bbde36d595f268b2ec37baa6b7dcea935b50345d989066f4d8d7d2aac1f
3
- size 11625
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/11.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e534fe28d74fb88578fee00b40aa3b368065bb67e6e240664024f7bd5fc37b64
3
- size 11625
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/110.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:49bd7320a4d2eb64bf04eff284eacc124a3bdfd3313f825d092788d42c3f6ad6
3
- size 11625
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/111.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6eea05553a2f4ef6ec9c51c5973ab576d86cd58c26a3777084b1baccfe61b44
3
- size 11625
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/112.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:333655dcb1cabb37f217569690e09fa884e8c6634f62610b1234476a2cddd177
3
- size 11625
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/113.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:338363e0f2b69e23d29be2aa9952e8de4a8d79ae27f0b015caded1a1231b74a4
3
- size 11625
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/114.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4099b77a8fb45d1e8f09d43af6e46d7bb59a60734f7f37f12e1cff31c07b91a
3
- size 11625
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/115.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5611bce0729c35305f3b67b0b7492786a109f071c06e26600009b6464238d81
3
- size 11625
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/116.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a79cac90622ee10de3a4d1b2fe1373155ead1fe325bba1c5fa12763fa3800718
3
- size 11625
 
 
 
 
EchoMimic/assets/test_pose_demo_pose/117.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3ada492fe7572daff2a8ac567c86f771bf90b5bd3147e8ad799c73ea83dbfb4
3
- size 11625