Spaces:

autophil
/

sonisphere

Running on T4

App Files Files Community

Phil Sobrepena commited on 25 days ago

Commit

f47eaa6

1 Parent(s): 73ed896

demo

Browse files

Files changed (2) hide show

Dockerfile +11 -10
gradio_demo.py +10 -117

Dockerfile CHANGED Viewed

@@ -12,17 +12,18 @@ RUN apt-get update && apt-get install -y \
     libxext6 \
     && rm -rf /var/lib/apt/lists/*
-# Install Python dependencies
-COPY requirements.txt .
-RUN pip3 install --no-cache-dir -r requirements.txt
-# Clone and install MMAudio
 RUN git clone https://github.com/hkchengrex/MMAudio.git && \
     cd MMAudio && \
     pip3 install -e .
-# Copy the application files
-COPY app.py .
 # Create output directory
 RUN mkdir -p output/gradio && chmod 777 output/gradio
@@ -32,8 +33,8 @@ ENV PYTHONUNBUFFERED=1
 ENV GRADIO_SERVER_NAME=0.0.0.0
 ENV GRADIO_SERVER_PORT=7860
-# Expose the port
 EXPOSE 7860
-# Run the Gradio app
-CMD ["python3", "app.py"]

     libxext6 \
     && rm -rf /var/lib/apt/lists/*
+# Clone MMAudio and install dependencies
 RUN git clone https://github.com/hkchengrex/MMAudio.git && \
     cd MMAudio && \
+    # Install PyTorch first as specified in README
+    pip3 install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118 && \
+    # Install additional dependencies
+    pip3 install -r requirements.txt && \
+    # Install MMAudio
     pip3 install -e .
+# Set working directory to MMAudio
+WORKDIR /code/MMAudio
 # Create output directory
 RUN mkdir -p output/gradio && chmod 777 output/gradio
 ENV GRADIO_SERVER_NAME=0.0.0.0
 ENV GRADIO_SERVER_PORT=7860
+# Expose Gradio port
 EXPOSE 7860
+# Run the Gradio demo
+CMD ["python3", "gradio_demo.py"]

gradio_demo.py CHANGED Viewed

@@ -170,10 +170,7 @@ def text_to_audio(prompt: str, negative_prompt: str, seed: int, num_steps: int,
 video_to_audio_tab = gr.Interface(
     fn=video_to_audio,
-    description="""
-    Project page: <a href="https://hkchengrex.com/MMAudio/">https://hkchengrex.com/MMAudio/</a><br>
-    Code: <a href="https://github.com/hkchengrex/MMAudio">https://github.com/hkchengrex/MMAudio</a><br>
     NOTE: It takes longer to process high-resolution videos (>384 px on the shorter side).
     Doing so does not improve results.
     """,
@@ -188,115 +185,13 @@ video_to_audio_tab = gr.Interface(
     ],
     outputs='playable_video',
     cache_examples=False,
-    title='MMAudio — Video-to-Audio Synthesis',
-    examples=[
-        [
-            'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_beach.mp4',
-            'waves, seagulls',
-            '',
-            0,
-            25,
-            4.5,
-            10,
-        ],
-        [
-            'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_serpent.mp4',
-            '',
-            'music',
-            0,
-            25,
-            4.5,
-            10,
-        ],
-        [
-            'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_seahorse.mp4',
-            'bubbles',
-            '',
-            0,
-            25,
-            4.5,
-            10,
-        ],
-        [
-            'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_india.mp4',
-            'Indian holy music',
-            '',
-            0,
-            25,
-            4.5,
-            10,
-        ],
-        [
-            'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_galloping.mp4',
-            'galloping',
-            '',
-            0,
-            25,
-            4.5,
-            10,
-        ],
-        [
-            'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_kraken.mp4',
-            'waves, storm',
-            '',
-            0,
-            25,
-            4.5,
-            10,
-        ],
-        [
-            'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/mochi_storm.mp4',
-            'storm',
-            '',
-            0,
-            25,
-            4.5,
-            10,
-        ],
-        [
-            'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/hunyuan_spring.mp4',
-            '',
-            '',
-            0,
-            25,
-            4.5,
-            10,
-        ],
-        [
-            'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/hunyuan_typing.mp4',
-            'typing',
-            '',
-            0,
-            25,
-            4.5,
-            10,
-        ],
-        [
-            'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/hunyuan_wake_up.mp4',
-            '',
-            '',
-            0,
-            25,
-            4.5,
-            10,
-        ],
-        [
-            'https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_nyc.mp4',
-            '',
-            '',
-            0,
-            25,
-            4.5,
-            10,
-        ],
-    ])
 text_to_audio_tab = gr.Interface(
     fn=text_to_audio,
-    description="""
-    Project page: <a href="https://hkchengrex.com/MMAudio/">https://hkchengrex.com/MMAudio/</a><br>
-    Code: <a href="https://github.com/hkchengrex/MMAudio">https://github.com/hkchengrex/MMAudio</a><br>
-    """,
     inputs=[
         gr.Text(label='Prompt'),
         gr.Text(label='Negative prompt'),
@@ -307,15 +202,13 @@ text_to_audio_tab = gr.Interface(
     ],
     outputs='audio',
     cache_examples=False,
-    title='MMAudio — Text-to-Audio Synthesis',
 )
 image_to_audio_tab = gr.Interface(
     fn=image_to_audio,
     description="""
-    Project page: <a href="https://hkchengrex.com/MMAudio/">https://hkchengrex.com/MMAudio/</a><br>
-    Code: <a href="https://github.com/hkchengrex/MMAudio">https://github.com/hkchengrex/MMAudio</a><br>
     NOTE: It takes longer to process high-resolution images (>384 px on the shorter side).
     Doing so does not improve results.
     """,
@@ -330,7 +223,7 @@ image_to_audio_tab = gr.Interface(
     ],
     outputs='playable_video',
     cache_examples=False,
-    title='MMAudio — Image-to-Audio Synthesis (experimental)',
 )
 if __name__ == "__main__":
@@ -339,5 +232,5 @@ if __name__ == "__main__":
     args = parser.parse_args()
     gr.TabbedInterface([video_to_audio_tab, text_to_audio_tab, image_to_audio_tab],
-                       ['Video-to-Audio', 'Text-to-Audio', 'Image-to-Audio (experimental)']).launch(
-                           server_port=args.port, allowed_paths=[output_dir])

 video_to_audio_tab = gr.Interface(
     fn=video_to_audio,
+    description=""" Video-to-Audio
     NOTE: It takes longer to process high-resolution videos (>384 px on the shorter side).
     Doing so does not improve results.
     """,
     ],
     outputs='playable_video',
     cache_examples=False,
+    title='Sonisphere - Sonic Branding Tool',
+    )
 text_to_audio_tab = gr.Interface(
     fn=text_to_audio,
+    description=""" Text-to-Audio
+ """,
     inputs=[
         gr.Text(label='Prompt'),
         gr.Text(label='Negative prompt'),
     ],
     outputs='audio',
     cache_examples=False,
+    title='Sonisphere - Sonic Branding Tool',
 )
 image_to_audio_tab = gr.Interface(
     fn=image_to_audio,
     description="""
+    Image-to-Audio
     NOTE: It takes longer to process high-resolution images (>384 px on the shorter side).
     Doing so does not improve results.
     """,
     ],
     outputs='playable_video',
     cache_examples=False,
+    title='Image-to-Audio Synthesis (experimental)',
 )
 if __name__ == "__main__":
     args = parser.parse_args()
     gr.TabbedInterface([video_to_audio_tab, text_to_audio_tab, image_to_audio_tab],
+                       ['Video-to-Audio', 'Text-to-Audio', 'Image-to-Audio (experimental)']).launch(auth=("admin", "sonisphere"),share=True,
+                           server_port=args.port, allowed_paths=[output_dir])