Spaces:
Running
on
T4
Running
on
T4
Phil Sobrepena
commited on
Commit
·
f47eaa6
1
Parent(s):
73ed896
demo
Browse files- Dockerfile +11 -10
- gradio_demo.py +10 -117
Dockerfile
CHANGED
@@ -12,17 +12,18 @@ RUN apt-get update && apt-get install -y \
|
|
12 |
libxext6 \
|
13 |
&& rm -rf /var/lib/apt/lists/*
|
14 |
|
15 |
-
#
|
16 |
-
COPY requirements.txt .
|
17 |
-
RUN pip3 install --no-cache-dir -r requirements.txt
|
18 |
-
|
19 |
-
# Clone and install MMAudio
|
20 |
RUN git clone https://github.com/hkchengrex/MMAudio.git && \
|
21 |
cd MMAudio && \
|
|
|
|
|
|
|
|
|
|
|
22 |
pip3 install -e .
|
23 |
|
24 |
-
#
|
25 |
-
|
26 |
|
27 |
# Create output directory
|
28 |
RUN mkdir -p output/gradio && chmod 777 output/gradio
|
@@ -32,8 +33,8 @@ ENV PYTHONUNBUFFERED=1
|
|
32 |
ENV GRADIO_SERVER_NAME=0.0.0.0
|
33 |
ENV GRADIO_SERVER_PORT=7860
|
34 |
|
35 |
-
# Expose
|
36 |
EXPOSE 7860
|
37 |
|
38 |
-
# Run the Gradio
|
39 |
-
CMD ["python3", "
|
|
|
12 |
libxext6 \
|
13 |
&& rm -rf /var/lib/apt/lists/*
|
14 |
|
15 |
+
# Clone MMAudio and install dependencies
|
|
|
|
|
|
|
|
|
16 |
RUN git clone https://github.com/hkchengrex/MMAudio.git && \
|
17 |
cd MMAudio && \
|
18 |
+
# Install PyTorch first as specified in README
|
19 |
+
pip3 install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118 && \
|
20 |
+
# Install additional dependencies
|
21 |
+
pip3 install -r requirements.txt && \
|
22 |
+
# Install MMAudio
|
23 |
pip3 install -e .
|
24 |
|
25 |
+
# Set working directory to MMAudio
|
26 |
+
WORKDIR /code/MMAudio
|
27 |
|
28 |
# Create output directory
|
29 |
RUN mkdir -p output/gradio && chmod 777 output/gradio
|
|
|
33 |
ENV GRADIO_SERVER_NAME=0.0.0.0
|
34 |
ENV GRADIO_SERVER_PORT=7860
|
35 |
|
36 |
+
# Expose Gradio port
|
37 |
EXPOSE 7860
|
38 |
|
39 |
+
# Run the Gradio demo
|
40 |
+
CMD ["python3", "gradio_demo.py"]
|
gradio_demo.py
CHANGED
@@ -170,10 +170,7 @@ def text_to_audio(prompt: str, negative_prompt: str, seed: int, num_steps: int,
|
|
170 |
|
171 |
video_to_audio_tab = gr.Interface(
|
172 |
fn=video_to_audio,
|
173 |
-
description="""
|
174 |
-
Project page: <a href="https://hkchengrex.com/MMAudio/">https://hkchengrex.com/MMAudio/</a><br>
|
175 |
-
Code: <a href="https://github.com/hkchengrex/MMAudio">https://github.com/hkchengrex/MMAudio</a><br>
|
176 |
-
|
177 |
NOTE: It takes longer to process high-resolution videos (>384 px on the shorter side).
|
178 |
Doing so does not improve results.
|
179 |
""",
|
@@ -188,115 +185,13 @@ video_to_audio_tab = gr.Interface(
|
|
188 |
],
|
189 |
outputs='playable_video',
|
190 |
cache_examples=False,
|
191 |
-
title='
|
192 |
-
|
193 |
-
[
|
194 |
-
'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_beach.mp4',
|
195 |
-
'waves, seagulls',
|
196 |
-
'',
|
197 |
-
0,
|
198 |
-
25,
|
199 |
-
4.5,
|
200 |
-
10,
|
201 |
-
],
|
202 |
-
[
|
203 |
-
'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_serpent.mp4',
|
204 |
-
'',
|
205 |
-
'music',
|
206 |
-
0,
|
207 |
-
25,
|
208 |
-
4.5,
|
209 |
-
10,
|
210 |
-
],
|
211 |
-
[
|
212 |
-
'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_seahorse.mp4',
|
213 |
-
'bubbles',
|
214 |
-
'',
|
215 |
-
0,
|
216 |
-
25,
|
217 |
-
4.5,
|
218 |
-
10,
|
219 |
-
],
|
220 |
-
[
|
221 |
-
'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_india.mp4',
|
222 |
-
'Indian holy music',
|
223 |
-
'',
|
224 |
-
0,
|
225 |
-
25,
|
226 |
-
4.5,
|
227 |
-
10,
|
228 |
-
],
|
229 |
-
[
|
230 |
-
'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_galloping.mp4',
|
231 |
-
'galloping',
|
232 |
-
'',
|
233 |
-
0,
|
234 |
-
25,
|
235 |
-
4.5,
|
236 |
-
10,
|
237 |
-
],
|
238 |
-
[
|
239 |
-
'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_kraken.mp4',
|
240 |
-
'waves, storm',
|
241 |
-
'',
|
242 |
-
0,
|
243 |
-
25,
|
244 |
-
4.5,
|
245 |
-
10,
|
246 |
-
],
|
247 |
-
[
|
248 |
-
'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/mochi_storm.mp4',
|
249 |
-
'storm',
|
250 |
-
'',
|
251 |
-
0,
|
252 |
-
25,
|
253 |
-
4.5,
|
254 |
-
10,
|
255 |
-
],
|
256 |
-
[
|
257 |
-
'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/hunyuan_spring.mp4',
|
258 |
-
'',
|
259 |
-
'',
|
260 |
-
0,
|
261 |
-
25,
|
262 |
-
4.5,
|
263 |
-
10,
|
264 |
-
],
|
265 |
-
[
|
266 |
-
'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/hunyuan_typing.mp4',
|
267 |
-
'typing',
|
268 |
-
'',
|
269 |
-
0,
|
270 |
-
25,
|
271 |
-
4.5,
|
272 |
-
10,
|
273 |
-
],
|
274 |
-
[
|
275 |
-
'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/hunyuan_wake_up.mp4',
|
276 |
-
'',
|
277 |
-
'',
|
278 |
-
0,
|
279 |
-
25,
|
280 |
-
4.5,
|
281 |
-
10,
|
282 |
-
],
|
283 |
-
[
|
284 |
-
'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_nyc.mp4',
|
285 |
-
'',
|
286 |
-
'',
|
287 |
-
0,
|
288 |
-
25,
|
289 |
-
4.5,
|
290 |
-
10,
|
291 |
-
],
|
292 |
-
])
|
293 |
|
294 |
text_to_audio_tab = gr.Interface(
|
295 |
fn=text_to_audio,
|
296 |
-
description="""
|
297 |
-
|
298 |
-
Code: <a href="https://github.com/hkchengrex/MMAudio">https://github.com/hkchengrex/MMAudio</a><br>
|
299 |
-
""",
|
300 |
inputs=[
|
301 |
gr.Text(label='Prompt'),
|
302 |
gr.Text(label='Negative prompt'),
|
@@ -307,15 +202,13 @@ text_to_audio_tab = gr.Interface(
|
|
307 |
],
|
308 |
outputs='audio',
|
309 |
cache_examples=False,
|
310 |
-
title='
|
311 |
)
|
312 |
|
313 |
image_to_audio_tab = gr.Interface(
|
314 |
fn=image_to_audio,
|
315 |
description="""
|
316 |
-
|
317 |
-
Code: <a href="https://github.com/hkchengrex/MMAudio">https://github.com/hkchengrex/MMAudio</a><br>
|
318 |
-
|
319 |
NOTE: It takes longer to process high-resolution images (>384 px on the shorter side).
|
320 |
Doing so does not improve results.
|
321 |
""",
|
@@ -330,7 +223,7 @@ image_to_audio_tab = gr.Interface(
|
|
330 |
],
|
331 |
outputs='playable_video',
|
332 |
cache_examples=False,
|
333 |
-
title='
|
334 |
)
|
335 |
|
336 |
if __name__ == "__main__":
|
@@ -339,5 +232,5 @@ if __name__ == "__main__":
|
|
339 |
args = parser.parse_args()
|
340 |
|
341 |
gr.TabbedInterface([video_to_audio_tab, text_to_audio_tab, image_to_audio_tab],
|
342 |
-
['Video-to-Audio', 'Text-to-Audio', 'Image-to-Audio (experimental)']).launch(
|
343 |
-
server_port=args.port, allowed_paths=[output_dir])
|
|
|
170 |
|
171 |
video_to_audio_tab = gr.Interface(
|
172 |
fn=video_to_audio,
|
173 |
+
description=""" Video-to-Audio
|
|
|
|
|
|
|
174 |
NOTE: It takes longer to process high-resolution videos (>384 px on the shorter side).
|
175 |
Doing so does not improve results.
|
176 |
""",
|
|
|
185 |
],
|
186 |
outputs='playable_video',
|
187 |
cache_examples=False,
|
188 |
+
title='Sonisphere - Sonic Branding Tool',
|
189 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
|
191 |
text_to_audio_tab = gr.Interface(
|
192 |
fn=text_to_audio,
|
193 |
+
description=""" Text-to-Audio
|
194 |
+
""",
|
|
|
|
|
195 |
inputs=[
|
196 |
gr.Text(label='Prompt'),
|
197 |
gr.Text(label='Negative prompt'),
|
|
|
202 |
],
|
203 |
outputs='audio',
|
204 |
cache_examples=False,
|
205 |
+
title='Sonisphere - Sonic Branding Tool',
|
206 |
)
|
207 |
|
208 |
image_to_audio_tab = gr.Interface(
|
209 |
fn=image_to_audio,
|
210 |
description="""
|
211 |
+
Image-to-Audio
|
|
|
|
|
212 |
NOTE: It takes longer to process high-resolution images (>384 px on the shorter side).
|
213 |
Doing so does not improve results.
|
214 |
""",
|
|
|
223 |
],
|
224 |
outputs='playable_video',
|
225 |
cache_examples=False,
|
226 |
+
title='Image-to-Audio Synthesis (experimental)',
|
227 |
)
|
228 |
|
229 |
if __name__ == "__main__":
|
|
|
232 |
args = parser.parse_args()
|
233 |
|
234 |
gr.TabbedInterface([video_to_audio_tab, text_to_audio_tab, image_to_audio_tab],
|
235 |
+
['Video-to-Audio', 'Text-to-Audio', 'Image-to-Audio (experimental)']).launch(auth=("admin", "sonisphere"),share=True,
|
236 |
+
server_port=args.port, allowed_paths=[output_dir])
|