Phil Sobrepena commited on
Commit
f47eaa6
·
1 Parent(s): 73ed896
Files changed (2) hide show
  1. Dockerfile +11 -10
  2. gradio_demo.py +10 -117
Dockerfile CHANGED
@@ -12,17 +12,18 @@ RUN apt-get update && apt-get install -y \
12
  libxext6 \
13
  && rm -rf /var/lib/apt/lists/*
14
 
15
- # Install Python dependencies
16
- COPY requirements.txt .
17
- RUN pip3 install --no-cache-dir -r requirements.txt
18
-
19
- # Clone and install MMAudio
20
  RUN git clone https://github.com/hkchengrex/MMAudio.git && \
21
  cd MMAudio && \
 
 
 
 
 
22
  pip3 install -e .
23
 
24
- # Copy the application files
25
- COPY app.py .
26
 
27
  # Create output directory
28
  RUN mkdir -p output/gradio && chmod 777 output/gradio
@@ -32,8 +33,8 @@ ENV PYTHONUNBUFFERED=1
32
  ENV GRADIO_SERVER_NAME=0.0.0.0
33
  ENV GRADIO_SERVER_PORT=7860
34
 
35
- # Expose the port
36
  EXPOSE 7860
37
 
38
- # Run the Gradio app
39
- CMD ["python3", "app.py"]
 
12
  libxext6 \
13
  && rm -rf /var/lib/apt/lists/*
14
 
15
+ # Clone MMAudio and install dependencies
 
 
 
 
16
  RUN git clone https://github.com/hkchengrex/MMAudio.git && \
17
  cd MMAudio && \
18
+ # Install PyTorch first as specified in README
19
+ pip3 install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118 && \
20
+ # Install additional dependencies
21
+ pip3 install -r requirements.txt && \
22
+ # Install MMAudio
23
  pip3 install -e .
24
 
25
+ # Set working directory to MMAudio
26
+ WORKDIR /code/MMAudio
27
 
28
  # Create output directory
29
  RUN mkdir -p output/gradio && chmod 777 output/gradio
 
33
  ENV GRADIO_SERVER_NAME=0.0.0.0
34
  ENV GRADIO_SERVER_PORT=7860
35
 
36
+ # Expose Gradio port
37
  EXPOSE 7860
38
 
39
+ # Run the Gradio demo
40
+ CMD ["python3", "gradio_demo.py"]
gradio_demo.py CHANGED
@@ -170,10 +170,7 @@ def text_to_audio(prompt: str, negative_prompt: str, seed: int, num_steps: int,
170
 
171
  video_to_audio_tab = gr.Interface(
172
  fn=video_to_audio,
173
- description="""
174
- Project page: <a href="https://hkchengrex.com/MMAudio/">https://hkchengrex.com/MMAudio/</a><br>
175
- Code: <a href="https://github.com/hkchengrex/MMAudio">https://github.com/hkchengrex/MMAudio</a><br>
176
-
177
  NOTE: It takes longer to process high-resolution videos (>384 px on the shorter side).
178
  Doing so does not improve results.
179
  """,
@@ -188,115 +185,13 @@ video_to_audio_tab = gr.Interface(
188
  ],
189
  outputs='playable_video',
190
  cache_examples=False,
191
- title='MMAudio — Video-to-Audio Synthesis',
192
- examples=[
193
- [
194
- 'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_beach.mp4',
195
- 'waves, seagulls',
196
- '',
197
- 0,
198
- 25,
199
- 4.5,
200
- 10,
201
- ],
202
- [
203
- 'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_serpent.mp4',
204
- '',
205
- 'music',
206
- 0,
207
- 25,
208
- 4.5,
209
- 10,
210
- ],
211
- [
212
- 'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_seahorse.mp4',
213
- 'bubbles',
214
- '',
215
- 0,
216
- 25,
217
- 4.5,
218
- 10,
219
- ],
220
- [
221
- 'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_india.mp4',
222
- 'Indian holy music',
223
- '',
224
- 0,
225
- 25,
226
- 4.5,
227
- 10,
228
- ],
229
- [
230
- 'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_galloping.mp4',
231
- 'galloping',
232
- '',
233
- 0,
234
- 25,
235
- 4.5,
236
- 10,
237
- ],
238
- [
239
- 'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_kraken.mp4',
240
- 'waves, storm',
241
- '',
242
- 0,
243
- 25,
244
- 4.5,
245
- 10,
246
- ],
247
- [
248
- 'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/mochi_storm.mp4',
249
- 'storm',
250
- '',
251
- 0,
252
- 25,
253
- 4.5,
254
- 10,
255
- ],
256
- [
257
- 'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/hunyuan_spring.mp4',
258
- '',
259
- '',
260
- 0,
261
- 25,
262
- 4.5,
263
- 10,
264
- ],
265
- [
266
- 'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/hunyuan_typing.mp4',
267
- 'typing',
268
- '',
269
- 0,
270
- 25,
271
- 4.5,
272
- 10,
273
- ],
274
- [
275
- 'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/hunyuan_wake_up.mp4',
276
- '',
277
- '',
278
- 0,
279
- 25,
280
- 4.5,
281
- 10,
282
- ],
283
- [
284
- 'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_nyc.mp4',
285
- '',
286
- '',
287
- 0,
288
- 25,
289
- 4.5,
290
- 10,
291
- ],
292
- ])
293
 
294
  text_to_audio_tab = gr.Interface(
295
  fn=text_to_audio,
296
- description="""
297
- Project page: <a href="https://hkchengrex.com/MMAudio/">https://hkchengrex.com/MMAudio/</a><br>
298
- Code: <a href="https://github.com/hkchengrex/MMAudio">https://github.com/hkchengrex/MMAudio</a><br>
299
- """,
300
  inputs=[
301
  gr.Text(label='Prompt'),
302
  gr.Text(label='Negative prompt'),
@@ -307,15 +202,13 @@ text_to_audio_tab = gr.Interface(
307
  ],
308
  outputs='audio',
309
  cache_examples=False,
310
- title='MMAudio — Text-to-Audio Synthesis',
311
  )
312
 
313
  image_to_audio_tab = gr.Interface(
314
  fn=image_to_audio,
315
  description="""
316
- Project page: <a href="https://hkchengrex.com/MMAudio/">https://hkchengrex.com/MMAudio/</a><br>
317
- Code: <a href="https://github.com/hkchengrex/MMAudio">https://github.com/hkchengrex/MMAudio</a><br>
318
-
319
  NOTE: It takes longer to process high-resolution images (>384 px on the shorter side).
320
  Doing so does not improve results.
321
  """,
@@ -330,7 +223,7 @@ image_to_audio_tab = gr.Interface(
330
  ],
331
  outputs='playable_video',
332
  cache_examples=False,
333
- title='MMAudio — Image-to-Audio Synthesis (experimental)',
334
  )
335
 
336
  if __name__ == "__main__":
@@ -339,5 +232,5 @@ if __name__ == "__main__":
339
  args = parser.parse_args()
340
 
341
  gr.TabbedInterface([video_to_audio_tab, text_to_audio_tab, image_to_audio_tab],
342
- ['Video-to-Audio', 'Text-to-Audio', 'Image-to-Audio (experimental)']).launch(
343
- server_port=args.port, allowed_paths=[output_dir])
 
170
 
171
  video_to_audio_tab = gr.Interface(
172
  fn=video_to_audio,
173
+ description=""" Video-to-Audio
 
 
 
174
  NOTE: It takes longer to process high-resolution videos (>384 px on the shorter side).
175
  Doing so does not improve results.
176
  """,
 
185
  ],
186
  outputs='playable_video',
187
  cache_examples=False,
188
+ title='Sonisphere - Sonic Branding Tool',
189
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
 
191
  text_to_audio_tab = gr.Interface(
192
  fn=text_to_audio,
193
+ description=""" Text-to-Audio
194
+ """,
 
 
195
  inputs=[
196
  gr.Text(label='Prompt'),
197
  gr.Text(label='Negative prompt'),
 
202
  ],
203
  outputs='audio',
204
  cache_examples=False,
205
+ title='Sonisphere - Sonic Branding Tool',
206
  )
207
 
208
  image_to_audio_tab = gr.Interface(
209
  fn=image_to_audio,
210
  description="""
211
+ Image-to-Audio
 
 
212
  NOTE: It takes longer to process high-resolution images (>384 px on the shorter side).
213
  Doing so does not improve results.
214
  """,
 
223
  ],
224
  outputs='playable_video',
225
  cache_examples=False,
226
+ title='Image-to-Audio Synthesis (experimental)',
227
  )
228
 
229
  if __name__ == "__main__":
 
232
  args = parser.parse_args()
233
 
234
  gr.TabbedInterface([video_to_audio_tab, text_to_audio_tab, image_to_audio_tab],
235
+ ['Video-to-Audio', 'Text-to-Audio', 'Image-to-Audio (experimental)']).launch(auth=("admin", "sonisphere"),share=True,
236
+ server_port=args.port, allowed_paths=[output_dir])