Spaces:

wzy013
/

hunyuanvideo-foley

Running

File size: 3,543 Bytes

55d09cb

"""
测试音频生成和 Gradio 兼容性
"""
import gradio as gr
import numpy as np
import wave
import tempfile
import os
from loguru import logger

def create_test_audio(text_prompt: str = "test audio") -> str:
    """创建测试音频文件"""
    sample_rate = 44100
    duration = 3.0
    duration_samples = int(duration * sample_rate)
    
    # 使用 numpy 生成音频
    t = np.linspace(0, duration, duration_samples, dtype=np.float32)
    
    # 根据文本生成不同音频
    if "footsteps" in text_prompt.lower():
        audio = 0.4 * np.sin(2 * np.pi * 2 * t) * np.exp(-3 * (t % 0.5))
    elif "rain" in text_prompt.lower():
        audio = 0.3 * np.random.randn(duration_samples)
    else:
        audio = 0.3 * np.sin(2 * np.pi * 440 * t)
    
    # 应用包络
    envelope = np.ones_like(audio)
    fade_samples = int(0.1 * sample_rate)
    envelope[:fade_samples] = np.linspace(0, 1, fade_samples)
    envelope[-fade_samples:] = np.linspace(1, 0, fade_samples)
    audio *= envelope
    
    # 保存为 WAV 文件
    temp_dir = tempfile.mkdtemp()
    audio_path = os.path.join(temp_dir, "test_audio.wav")
    
    # 规范化到 int16
    audio_normalized = np.clip(audio, -1.0, 1.0)
    audio_int16 = (audio_normalized * 32767).astype(np.int16)
    
    # 使用 wave 模块保存
    with wave.open(audio_path, 'w') as wav_file:
        wav_file.setnchannels(1)
        wav_file.setsampwidth(2)
        wav_file.setframerate(sample_rate)
        wav_file.writeframes(audio_int16.tobytes())
    
    logger.info(f"✅ 测试音频已保存: {audio_path}")
    return audio_path

def test_interface(text_input):
    """测试接口"""
    try:
        # 生成音频
        audio_path = create_test_audio(text_input)
        
        # 检查文件是否存在
        if os.path.exists(audio_path):
            file_size = os.path.getsize(audio_path)
            status = f"✅ 成功生成音频！\n文件路径: {audio_path}\n文件大小: {file_size} bytes\n文本: {text_input}"
            return audio_path, status
        else:
            return None, "❌ 音频文件未生成"
            
    except Exception as e:
        logger.error(f"错误: {e}")
        return None, f"❌ 生成失败: {str(e)}"

# 创建 Gradio 界面
def create_test_app():
    with gr.Blocks(title="Audio Test") as app:
        gr.HTML("<h1>🎵 音频兼容性测试</h1>")
        
        with gr.Row():
            with gr.Column():
                text_input = gr.Textbox(
                    label="文本输入",
                    value="footsteps on ground",
                    placeholder="输入文本描述..."
                )
                generate_btn = gr.Button("生成测试音频", variant="primary")
                
            with gr.Column():
                audio_output = gr.Audio(label="生成的音频")
                status_output = gr.Textbox(
                    label="状态信息",
                    lines=5,
                    interactive=False
                )
        
        generate_btn.click(
            fn=test_interface,
            inputs=[text_input],
            outputs=[audio_output, status_output]
        )
    
    return app

if __name__ == "__main__":
    # 设置日志
    logger.remove()
    logger.add(lambda msg: print(msg, end=''), level="INFO")
    
    logger.info("启动音频测试应用...")
    
    app = create_test_app()
    app.launch(
        server_name="0.0.0.0",
        server_port=7861,
        share=False,
        debug=True
    )