hunyuanvideo-foley / test_audio.py
wzy013's picture
实现超级兼容的音频生成解决方案 - 彻底解决后端错误
55d09cb
"""
测试音频生成和 Gradio 兼容性
"""
import gradio as gr
import numpy as np
import wave
import tempfile
import os
from loguru import logger
def create_test_audio(text_prompt: str = "test audio") -> str:
"""创建测试音频文件"""
sample_rate = 44100
duration = 3.0
duration_samples = int(duration * sample_rate)
# 使用 numpy 生成音频
t = np.linspace(0, duration, duration_samples, dtype=np.float32)
# 根据文本生成不同音频
if "footsteps" in text_prompt.lower():
audio = 0.4 * np.sin(2 * np.pi * 2 * t) * np.exp(-3 * (t % 0.5))
elif "rain" in text_prompt.lower():
audio = 0.3 * np.random.randn(duration_samples)
else:
audio = 0.3 * np.sin(2 * np.pi * 440 * t)
# 应用包络
envelope = np.ones_like(audio)
fade_samples = int(0.1 * sample_rate)
envelope[:fade_samples] = np.linspace(0, 1, fade_samples)
envelope[-fade_samples:] = np.linspace(1, 0, fade_samples)
audio *= envelope
# 保存为 WAV 文件
temp_dir = tempfile.mkdtemp()
audio_path = os.path.join(temp_dir, "test_audio.wav")
# 规范化到 int16
audio_normalized = np.clip(audio, -1.0, 1.0)
audio_int16 = (audio_normalized * 32767).astype(np.int16)
# 使用 wave 模块保存
with wave.open(audio_path, 'w') as wav_file:
wav_file.setnchannels(1)
wav_file.setsampwidth(2)
wav_file.setframerate(sample_rate)
wav_file.writeframes(audio_int16.tobytes())
logger.info(f"✅ 测试音频已保存: {audio_path}")
return audio_path
def test_interface(text_input):
"""测试接口"""
try:
# 生成音频
audio_path = create_test_audio(text_input)
# 检查文件是否存在
if os.path.exists(audio_path):
file_size = os.path.getsize(audio_path)
status = f"✅ 成功生成音频!\n文件路径: {audio_path}\n文件大小: {file_size} bytes\n文本: {text_input}"
return audio_path, status
else:
return None, "❌ 音频文件未生成"
except Exception as e:
logger.error(f"错误: {e}")
return None, f"❌ 生成失败: {str(e)}"
# 创建 Gradio 界面
def create_test_app():
with gr.Blocks(title="Audio Test") as app:
gr.HTML("<h1>🎵 音频兼容性测试</h1>")
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="文本输入",
value="footsteps on ground",
placeholder="输入文本描述..."
)
generate_btn = gr.Button("生成测试音频", variant="primary")
with gr.Column():
audio_output = gr.Audio(label="生成的音频")
status_output = gr.Textbox(
label="状态信息",
lines=5,
interactive=False
)
generate_btn.click(
fn=test_interface,
inputs=[text_input],
outputs=[audio_output, status_output]
)
return app
if __name__ == "__main__":
# 设置日志
logger.remove()
logger.add(lambda msg: print(msg, end=''), level="INFO")
logger.info("启动音频测试应用...")
app = create_test_app()
app.launch(
server_name="0.0.0.0",
server_port=7861,
share=False,
debug=True
)