Spaces:
Running
Running
import os | |
import tempfile | |
import gradio as gr | |
import torch | |
import torchaudio | |
from loguru import logger | |
from typing import Optional, Tuple | |
import random | |
import numpy as np | |
import requests | |
import json | |
# Simplified working version without loading large models | |
def create_demo_audio(video_file, text_prompt: str, duration: float = 5.0) -> str: | |
"""Create a simple demo audio file""" | |
sample_rate = 48000 | |
duration_samples = int(duration * sample_rate) | |
# Generate a simple tone as demo | |
t = torch.linspace(0, duration, duration_samples) | |
frequency = 440 # A note | |
audio = 0.3 * torch.sin(2 * 3.14159 * frequency * t) | |
# Add some variation based on text prompt length | |
if text_prompt: | |
freq_mod = len(text_prompt) * 10 | |
audio += 0.1 * torch.sin(2 * 3.14159 * freq_mod * t) | |
# Save to temporary file | |
temp_dir = tempfile.mkdtemp() | |
audio_path = os.path.join(temp_dir, "demo_audio.wav") | |
torchaudio.save(audio_path, audio.unsqueeze(0), sample_rate) | |
return audio_path | |
def process_video_demo(video_file, text_prompt: str, guidance_scale: float, inference_steps: int, sample_nums: int) -> Tuple[list, str]: | |
"""Working demo version that generates simple audio""" | |
if video_file is None: | |
return [], "β Please upload a video file!" | |
if text_prompt is None: | |
text_prompt = "" | |
try: | |
logger.info(f"Processing video in demo mode: {video_file}") | |
logger.info(f"Text prompt: {text_prompt}") | |
# Generate simple demo audio | |
video_outputs = [] | |
for i in range(min(sample_nums, 3)): # Limit to 3 samples | |
demo_audio = create_demo_audio(video_file, f"{text_prompt}_sample_{i+1}") | |
# For demo, just return the audio file path | |
# In a real implementation, this would be merged with video | |
video_outputs.append(demo_audio) | |
success_msg = f"""β Demo Generation Complete! | |
πΉ **Processed**: {os.path.basename(video_file) if hasattr(video_file, 'name') else 'Video file'} | |
π **Prompt**: "{text_prompt}" | |
βοΈ **Settings**: CFG={guidance_scale}, Steps={inference_steps}, Samples={sample_nums} | |
π΅ **Generated**: {len(video_outputs)} demo audio sample(s) | |
β οΈ **Note**: This is a working demo with synthetic audio. | |
For real AI-generated Foley audio, run locally with the full model: | |
https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley""" | |
return video_outputs, success_msg | |
except Exception as e: | |
logger.error(f"Demo processing failed: {str(e)}") | |
return [], f"β Demo processing failed: {str(e)}" | |
def create_working_interface(): | |
"""Create a working Gradio interface""" | |
css = """ | |
.gradio-container { | |
font-family: 'Inter', sans-serif; | |
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); | |
} | |
.main-header { | |
text-align: center; | |
padding: 2rem; | |
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
border-radius: 20px; | |
margin-bottom: 2rem; | |
color: white; | |
} | |
.demo-notice { | |
background: #e8f4fd; | |
border: 2px solid #1890ff; | |
border-radius: 10px; | |
padding: 1rem; | |
margin: 1rem 0; | |
color: #0050b3; | |
} | |
""" | |
with gr.Blocks(css=css, title="HunyuanVideo-Foley Demo") as app: | |
# Header | |
with gr.Column(elem_classes=["main-header"]): | |
gr.HTML(""" | |
<h1>π΅ HunyuanVideo-Foley</h1> | |
<p>Working Demo Version</p> | |
""") | |
# Demo Notice | |
gr.HTML(""" | |
<div class="demo-notice"> | |
<strong>π― Working Demo:</strong> This version generates synthetic audio to demonstrate the interface. | |
Upload a video and try the controls to see how it works!<br> | |
<strong>For real AI audio:</strong> Visit the <a href="https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley" target="_blank">original repository</a> | |
</div> | |
""") | |
with gr.Row(): | |
# Input Section | |
with gr.Column(scale=1): | |
gr.Markdown("### πΉ Video Input") | |
video_input = gr.Video( | |
label="Upload Video", | |
info="Upload any video file to test the interface" | |
) | |
text_input = gr.Textbox( | |
label="π― Audio Description", | |
placeholder="Describe the audio you want (affects demo tone)", | |
lines=3 | |
) | |
with gr.Row(): | |
guidance_scale = gr.Slider( | |
minimum=1.0, | |
maximum=10.0, | |
value=4.0, | |
step=0.1, | |
label="ποΈ CFG Scale" | |
) | |
inference_steps = gr.Slider( | |
minimum=10, | |
maximum=100, | |
value=50, | |
step=5, | |
label="β‘ Steps" | |
) | |
sample_nums = gr.Slider( | |
minimum=1, | |
maximum=3, | |
value=1, | |
step=1, | |
label="π² Samples" | |
) | |
generate_btn = gr.Button("π΅ Generate Demo Audio", variant="primary") | |
# Output Section | |
with gr.Column(scale=1): | |
gr.Markdown("### π΅ Generated Audio") | |
audio_output_1 = gr.Audio(label="Sample 1", visible=True) | |
audio_output_2 = gr.Audio(label="Sample 2", visible=False) | |
audio_output_3 = gr.Audio(label="Sample 3", visible=False) | |
status_output = gr.Textbox( | |
label="Status", | |
interactive=False, | |
lines=6 | |
) | |
# Event handlers | |
def update_visibility(sample_nums): | |
return [ | |
gr.update(visible=True), # Sample 1 always visible | |
gr.update(visible=sample_nums >= 2), | |
gr.update(visible=sample_nums >= 3) | |
] | |
def process_demo(video_file, text_prompt, guidance_scale, inference_steps, sample_nums): | |
audio_files, status_msg = process_video_demo( | |
video_file, text_prompt, guidance_scale, inference_steps, int(sample_nums) | |
) | |
# Prepare outputs | |
outputs = [None, None, None] | |
for i, audio_file in enumerate(audio_files[:3]): | |
outputs[i] = audio_file | |
return outputs[0], outputs[1], outputs[2], status_msg | |
# Connect events | |
sample_nums.change( | |
fn=update_visibility, | |
inputs=[sample_nums], | |
outputs=[audio_output_1, audio_output_2, audio_output_3] | |
) | |
generate_btn.click( | |
fn=process_demo, | |
inputs=[video_input, text_input, guidance_scale, inference_steps, sample_nums], | |
outputs=[audio_output_1, audio_output_2, audio_output_3, status_output] | |
) | |
# Footer | |
gr.HTML(""" | |
<div style="text-align: center; padding: 2rem; color: #666;"> | |
<p>π <strong>Demo Version:</strong> Generates synthetic audio for interface demonstration</p> | |
<p>π <strong>Full Version:</strong> <a href="https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley" target="_blank">GitHub Repository</a></p> | |
</div> | |
""") | |
return app | |
if __name__ == "__main__": | |
# Setup logging | |
logger.remove() | |
logger.add(lambda msg: print(msg, end=''), level="INFO") | |
logger.info("Starting HunyuanVideo-Foley Working Demo...") | |
# Create and launch app | |
app = create_working_interface() | |
logger.info("Demo app ready - will generate synthetic audio for testing") | |
app.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
share=False, | |
debug=False, | |
show_error=True | |
) |