Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
c97cd22
1
Parent(s):
3121e9b
Localize Gradio interface to Korean
Browse files- Translated all UI text strings from English to Korean
- Updated header title and description
- Localized input labels: video upload, audio description, CFG scale, steps, sample numbers
- Translated button text and status messages
- Updated quick start guide and examples section
- Localized all logging messages and error messages
- Maintained English text prompts as they are expected by the model
🤖 Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <[email protected]>
app.py
CHANGED
@@ -26,10 +26,10 @@ MODEL_PATH = os.environ.get("HIFI_FOLEY_MODEL_PATH", "./pretrained_models/")
|
|
26 |
CONFIG_PATH = "configs/hunyuanvideo-foley-xxl.yaml"
|
27 |
|
28 |
def download_model_from_hf(repo_id: str = "tencent/HunyuanVideo-Foley", local_dir: str = "./pretrained_models") -> str:
|
29 |
-
"""
|
30 |
try:
|
31 |
-
logger.info(f"
|
32 |
-
logger.info(f"
|
33 |
|
34 |
# 确保本地目录存在
|
35 |
os.makedirs(local_dir, exist_ok=True)
|
@@ -42,11 +42,11 @@ def download_model_from_hf(repo_id: str = "tencent/HunyuanVideo-Foley", local_di
|
|
42 |
local_files_only=False, # 允许从网络下载
|
43 |
)
|
44 |
|
45 |
-
logger.info(f"✅
|
46 |
-
return f"✅
|
47 |
|
48 |
except Exception as e:
|
49 |
-
error_msg = f"❌
|
50 |
logger.error(error_msg)
|
51 |
return error_msg
|
52 |
|
@@ -72,48 +72,48 @@ def setup_device(device_str: str = "auto", gpu_id: int = 0) -> torch.device:
|
|
72 |
return device
|
73 |
|
74 |
def auto_load_models() -> str:
|
75 |
-
"""
|
76 |
global model_dict, cfg, device
|
77 |
|
78 |
try:
|
79 |
-
#
|
80 |
if not os.path.exists(MODEL_PATH):
|
81 |
-
logger.info(f"
|
82 |
download_result = download_model_from_hf(local_dir=MODEL_PATH.rstrip('/'))
|
83 |
-
if "
|
84 |
return download_result
|
85 |
|
86 |
-
#
|
87 |
if not os.path.exists(CONFIG_PATH):
|
88 |
-
logger.info(f"
|
89 |
-
#
|
90 |
if CONFIG_PATH.startswith("configs/"):
|
91 |
config_dir = os.path.dirname(CONFIG_PATH)
|
92 |
if not os.path.exists(config_dir):
|
93 |
download_result = download_model_from_hf(local_dir="./")
|
94 |
-
if "
|
95 |
return download_result
|
96 |
|
97 |
-
#
|
98 |
if not os.path.exists(CONFIG_PATH):
|
99 |
-
return f"❌
|
100 |
|
101 |
# Use GPU by default
|
102 |
device = setup_device("auto", 0)
|
103 |
|
104 |
-
#
|
105 |
-
logger.info("
|
106 |
-
logger.info(f"
|
107 |
-
logger.info(f"
|
108 |
|
109 |
model_dict, cfg = load_model(MODEL_PATH, CONFIG_PATH, device)
|
110 |
|
111 |
-
logger.info("✅
|
112 |
-
return "✅
|
113 |
|
114 |
except Exception as e:
|
115 |
-
logger.error(f"
|
116 |
-
return f"❌
|
117 |
|
118 |
@spaces.GPU(duration=120)
|
119 |
@torch.inference_mode()
|
@@ -128,10 +128,10 @@ def infer_single_video(
|
|
128 |
global model_dict, cfg, device
|
129 |
|
130 |
if model_dict is None or cfg is None:
|
131 |
-
return [], "❌
|
132 |
|
133 |
if video_file is None:
|
134 |
-
return [], "❌
|
135 |
|
136 |
# Allow empty text prompt, use empty string if no prompt provided
|
137 |
if text_prompt is None:
|
@@ -153,7 +153,7 @@ def infer_single_video(
|
|
153 |
# Denoising process to generate multiple audio samples
|
154 |
# Note: The model now generates sample_nums audio samples per inference
|
155 |
# The denoise_process function returns audio with shape [batch_size, channels, samples]
|
156 |
-
logger.info(f"
|
157 |
audio, sample_rate = denoise_process(
|
158 |
visual_feats,
|
159 |
text_feats,
|
@@ -180,12 +180,12 @@ def infer_single_video(
|
|
180 |
merge_audio_video(audio_output, video_file, video_output)
|
181 |
video_outputs.append(video_output)
|
182 |
|
183 |
-
logger.info(f"
|
184 |
-
return video_outputs, f"✅
|
185 |
|
186 |
except Exception as e:
|
187 |
-
logger.error(f"
|
188 |
-
return [], f"❌
|
189 |
|
190 |
def update_video_outputs(video_list, status_msg):
|
191 |
"""Update video outputs based on the number of generated samples"""
|
@@ -538,31 +538,31 @@ def create_gradio_interface():
|
|
538 |
with gr.Column(elem_classes=["main-header"]):
|
539 |
gr.HTML("""
|
540 |
<h1>🎵 HunyuanVideo-Foley</h1>
|
541 |
-
<p
|
542 |
""")
|
543 |
|
544 |
# Usage Guide
|
545 |
with gr.Column(elem_classes=["status-card"]):
|
546 |
gr.Markdown("""
|
547 |
-
### 📋
|
548 |
-
**1.**
|
549 |
|
550 |
-
💡
|
551 |
""", elem_classes=["usage-guide"])
|
552 |
|
553 |
# Main inference interface - Input and Results side by side
|
554 |
with gr.Row(elem_classes=["main-interface"]):
|
555 |
# Input section
|
556 |
with gr.Column(scale=1, elem_classes=["input-section"]):
|
557 |
-
gr.Markdown("### 📹
|
558 |
|
559 |
video_input = gr.Video(
|
560 |
-
label="
|
561 |
height=300
|
562 |
)
|
563 |
|
564 |
text_input = gr.Textbox(
|
565 |
-
label="🎯
|
566 |
placeholder="A person walks on frozen ice",
|
567 |
lines=3,
|
568 |
)
|
@@ -573,7 +573,7 @@ def create_gradio_interface():
|
|
573 |
maximum=10.0,
|
574 |
value=4.5,
|
575 |
step=0.1,
|
576 |
-
label="🎚️ CFG
|
577 |
)
|
578 |
|
579 |
inference_steps = gr.Slider(
|
@@ -581,7 +581,7 @@ def create_gradio_interface():
|
|
581 |
maximum=100,
|
582 |
value=50,
|
583 |
step=5,
|
584 |
-
label="⚡
|
585 |
)
|
586 |
|
587 |
sample_nums = gr.Slider(
|
@@ -589,24 +589,24 @@ def create_gradio_interface():
|
|
589 |
maximum=6,
|
590 |
value=1,
|
591 |
step=1,
|
592 |
-
label="🎲
|
593 |
)
|
594 |
|
595 |
generate_btn = gr.Button(
|
596 |
-
"🎵
|
597 |
variant="primary",
|
598 |
elem_classes=["generate-btn"]
|
599 |
)
|
600 |
|
601 |
# Results section
|
602 |
with gr.Column(scale=1, elem_classes=["output-section"]):
|
603 |
-
gr.Markdown("### 🎥
|
604 |
|
605 |
# Multi-video gallery for displaying multiple generated samples
|
606 |
with gr.Column():
|
607 |
# Primary video (Sample 1)
|
608 |
video_output_1 = gr.Video(
|
609 |
-
label="
|
610 |
height=250,
|
611 |
visible=True
|
612 |
)
|
@@ -615,44 +615,44 @@ def create_gradio_interface():
|
|
615 |
with gr.Row(elem_classes=["additional-samples"]):
|
616 |
with gr.Column(scale=1):
|
617 |
video_output_2 = gr.Video(
|
618 |
-
label="
|
619 |
height=150,
|
620 |
visible=False
|
621 |
)
|
622 |
video_output_3 = gr.Video(
|
623 |
-
label="
|
624 |
height=150,
|
625 |
visible=False
|
626 |
)
|
627 |
with gr.Column(scale=1):
|
628 |
video_output_4 = gr.Video(
|
629 |
-
label="
|
630 |
height=150,
|
631 |
visible=False
|
632 |
)
|
633 |
video_output_5 = gr.Video(
|
634 |
-
label="
|
635 |
height=150,
|
636 |
visible=False
|
637 |
)
|
638 |
|
639 |
# Sample 6 - full width
|
640 |
video_output_6 = gr.Video(
|
641 |
-
label="
|
642 |
height=150,
|
643 |
visible=False
|
644 |
)
|
645 |
|
646 |
result_text = gr.Textbox(
|
647 |
-
label="
|
648 |
interactive=False,
|
649 |
lines=2
|
650 |
)
|
651 |
|
652 |
# Examples section at the bottom
|
653 |
with gr.Column(elem_classes=["examples-section"]):
|
654 |
-
gr.Markdown("### 🌟
|
655 |
-
gr.Markdown("
|
656 |
|
657 |
# Define your custom examples here - 8 examples total
|
658 |
examples_data = [
|
@@ -741,7 +741,7 @@ def create_gradio_interface():
|
|
741 |
|
742 |
# Load button
|
743 |
example_btn = gr.Button(
|
744 |
-
f"
|
745 |
variant="secondary",
|
746 |
size="sm"
|
747 |
)
|
@@ -821,7 +821,7 @@ def create_gradio_interface():
|
|
821 |
# Footer
|
822 |
gr.HTML("""
|
823 |
<div class="footer-text">
|
824 |
-
<p>🚀
|
825 |
</div>
|
826 |
""")
|
827 |
|
@@ -839,7 +839,7 @@ if __name__ == "__main__":
|
|
839 |
logger.add(lambda msg: print(msg, end=''), level="INFO")
|
840 |
|
841 |
# Auto-load model
|
842 |
-
logger.info("
|
843 |
model_load_result = auto_load_models()
|
844 |
logger.info(model_load_result)
|
845 |
|
@@ -848,7 +848,7 @@ if __name__ == "__main__":
|
|
848 |
|
849 |
# Log completion status
|
850 |
if "successfully" in model_load_result:
|
851 |
-
logger.info("
|
852 |
|
853 |
# Test
|
854 |
app.launch(
|
|
|
26 |
CONFIG_PATH = "configs/hunyuanvideo-foley-xxl.yaml"
|
27 |
|
28 |
def download_model_from_hf(repo_id: str = "tencent/HunyuanVideo-Foley", local_dir: str = "./pretrained_models") -> str:
|
29 |
+
"""HuggingFace에서 모델을 로컬 디렉토리로 자동 다운로드"""
|
30 |
try:
|
31 |
+
logger.info(f"HuggingFace에서 모델 다운로드 시작: {repo_id}")
|
32 |
+
logger.info(f"다운로드 대상 디렉토리: {local_dir}")
|
33 |
|
34 |
# 确保本地目录存在
|
35 |
os.makedirs(local_dir, exist_ok=True)
|
|
|
42 |
local_files_only=False, # 允许从网络下载
|
43 |
)
|
44 |
|
45 |
+
logger.info(f"✅ 모델 다운로드 성공! 저장 위치: {local_dir}")
|
46 |
+
return f"✅ {repo_id}에서 모델 다운로드 성공!"
|
47 |
|
48 |
except Exception as e:
|
49 |
+
error_msg = f"❌ 모델 다운로드 실패: {str(e)}"
|
50 |
logger.error(error_msg)
|
51 |
return error_msg
|
52 |
|
|
|
72 |
return device
|
73 |
|
74 |
def auto_load_models() -> str:
|
75 |
+
"""사전 설정된 모델을 자동으로 로드"""
|
76 |
global model_dict, cfg, device
|
77 |
|
78 |
try:
|
79 |
+
# 모델 경로가 존재하지 않으면 HuggingFace에서 다운로드 시도
|
80 |
if not os.path.exists(MODEL_PATH):
|
81 |
+
logger.info(f"모델 경로 {MODEL_PATH}가 존재하지 않아 HuggingFace에서 다운로드 시작...")
|
82 |
download_result = download_model_from_hf(local_dir=MODEL_PATH.rstrip('/'))
|
83 |
+
if "실패" in download_result:
|
84 |
return download_result
|
85 |
|
86 |
+
# 구성 파일이 존재하지 않으면 HuggingFace에서 다운로드 시도
|
87 |
if not os.path.exists(CONFIG_PATH):
|
88 |
+
logger.info(f"구성 파일 {CONFIG_PATH}가 존재하지 않아 HuggingFace에서 다운로드 시도...")
|
89 |
+
# pretrained_models/ 구성 경로인 경우 다운로드 시도
|
90 |
if CONFIG_PATH.startswith("configs/"):
|
91 |
config_dir = os.path.dirname(CONFIG_PATH)
|
92 |
if not os.path.exists(config_dir):
|
93 |
download_result = download_model_from_hf(local_dir="./")
|
94 |
+
if "실패" in download_result:
|
95 |
return download_result
|
96 |
|
97 |
+
# 구성 파일 존재 여부 최종 확인
|
98 |
if not os.path.exists(CONFIG_PATH):
|
99 |
+
return f"❌ 구성 파일을 찾을 수 없음: {CONFIG_PATH}"
|
100 |
|
101 |
# Use GPU by default
|
102 |
device = setup_device("auto", 0)
|
103 |
|
104 |
+
# 모델 로드
|
105 |
+
logger.info("모델 로딩 중...")
|
106 |
+
logger.info(f"모델 경로: {MODEL_PATH}")
|
107 |
+
logger.info(f"구성 경로: {CONFIG_PATH}")
|
108 |
|
109 |
model_dict, cfg = load_model(MODEL_PATH, CONFIG_PATH, device)
|
110 |
|
111 |
+
logger.info("✅ 모델 로딩 성공!")
|
112 |
+
return "✅ 모델 로딩 성공!"
|
113 |
|
114 |
except Exception as e:
|
115 |
+
logger.error(f"모델 로딩 실패: {str(e)}")
|
116 |
+
return f"❌ 모델 로딩 실패: {str(e)}"
|
117 |
|
118 |
@spaces.GPU(duration=120)
|
119 |
@torch.inference_mode()
|
|
|
128 |
global model_dict, cfg, device
|
129 |
|
130 |
if model_dict is None or cfg is None:
|
131 |
+
return [], "❌ 먼저 모델을 로드해주세요!"
|
132 |
|
133 |
if video_file is None:
|
134 |
+
return [], "❌ 비디오 파일을 업로드해주세요!"
|
135 |
|
136 |
# Allow empty text prompt, use empty string if no prompt provided
|
137 |
if text_prompt is None:
|
|
|
153 |
# Denoising process to generate multiple audio samples
|
154 |
# Note: The model now generates sample_nums audio samples per inference
|
155 |
# The denoise_process function returns audio with shape [batch_size, channels, samples]
|
156 |
+
logger.info(f"{sample_nums}개 오디오 샘플 생성 중...")
|
157 |
audio, sample_rate = denoise_process(
|
158 |
visual_feats,
|
159 |
text_feats,
|
|
|
180 |
merge_audio_video(audio_output, video_file, video_output)
|
181 |
video_outputs.append(video_output)
|
182 |
|
183 |
+
logger.info(f"추론 완료! {sample_nums}개 샘플 생성됨.")
|
184 |
+
return video_outputs, f"✅ {sample_nums}개 오디오 샘플이 성공적으로 생성되었습니다!"
|
185 |
|
186 |
except Exception as e:
|
187 |
+
logger.error(f"추론 실패: {str(e)}")
|
188 |
+
return [], f"❌ 추론 실패: {str(e)}"
|
189 |
|
190 |
def update_video_outputs(video_list, status_msg):
|
191 |
"""Update video outputs based on the number of generated samples"""
|
|
|
538 |
with gr.Column(elem_classes=["main-header"]):
|
539 |
gr.HTML("""
|
540 |
<h1>🎵 HunyuanVideo-Foley</h1>
|
541 |
+
<p>텍스트-비디오-오디오 합성: 비디오와 텍스트 설명에서 사실적인 오디오 생성</p>
|
542 |
""")
|
543 |
|
544 |
# Usage Guide
|
545 |
with gr.Column(elem_classes=["status-card"]):
|
546 |
gr.Markdown("""
|
547 |
+
### 📋 빠른 시작 가이드
|
548 |
+
**1.** 비디오 파일 업로드\t**2.** 선택적 텍스트 설명 추가\t**3.** 샘플 수 조정 (1-6)\t**4.** 오디오 생성 클릭
|
549 |
|
550 |
+
💡 빠른 시작을 위해 버튼을 클릭하여 준비된 예제를 로드할 수 있습니다.
|
551 |
""", elem_classes=["usage-guide"])
|
552 |
|
553 |
# Main inference interface - Input and Results side by side
|
554 |
with gr.Row(elem_classes=["main-interface"]):
|
555 |
# Input section
|
556 |
with gr.Column(scale=1, elem_classes=["input-section"]):
|
557 |
+
gr.Markdown("### 📹 비디오 입력")
|
558 |
|
559 |
video_input = gr.Video(
|
560 |
+
label="비디오 업로드",
|
561 |
height=300
|
562 |
)
|
563 |
|
564 |
text_input = gr.Textbox(
|
565 |
+
label="🎯 오디오 설명 (영어)",
|
566 |
placeholder="A person walks on frozen ice",
|
567 |
lines=3,
|
568 |
)
|
|
|
573 |
maximum=10.0,
|
574 |
value=4.5,
|
575 |
step=0.1,
|
576 |
+
label="🎚️ CFG 스케일",
|
577 |
)
|
578 |
|
579 |
inference_steps = gr.Slider(
|
|
|
581 |
maximum=100,
|
582 |
value=50,
|
583 |
step=5,
|
584 |
+
label="⚡ 단계",
|
585 |
)
|
586 |
|
587 |
sample_nums = gr.Slider(
|
|
|
589 |
maximum=6,
|
590 |
value=1,
|
591 |
step=1,
|
592 |
+
label="🎲 샘플 수",
|
593 |
)
|
594 |
|
595 |
generate_btn = gr.Button(
|
596 |
+
"🎵 오디오 생성",
|
597 |
variant="primary",
|
598 |
elem_classes=["generate-btn"]
|
599 |
)
|
600 |
|
601 |
# Results section
|
602 |
with gr.Column(scale=1, elem_classes=["output-section"]):
|
603 |
+
gr.Markdown("### 🎥 생성 결과")
|
604 |
|
605 |
# Multi-video gallery for displaying multiple generated samples
|
606 |
with gr.Column():
|
607 |
# Primary video (Sample 1)
|
608 |
video_output_1 = gr.Video(
|
609 |
+
label="샘플 1",
|
610 |
height=250,
|
611 |
visible=True
|
612 |
)
|
|
|
615 |
with gr.Row(elem_classes=["additional-samples"]):
|
616 |
with gr.Column(scale=1):
|
617 |
video_output_2 = gr.Video(
|
618 |
+
label="샘플 2",
|
619 |
height=150,
|
620 |
visible=False
|
621 |
)
|
622 |
video_output_3 = gr.Video(
|
623 |
+
label="샘플 3",
|
624 |
height=150,
|
625 |
visible=False
|
626 |
)
|
627 |
with gr.Column(scale=1):
|
628 |
video_output_4 = gr.Video(
|
629 |
+
label="샘플 4",
|
630 |
height=150,
|
631 |
visible=False
|
632 |
)
|
633 |
video_output_5 = gr.Video(
|
634 |
+
label="샘플 5",
|
635 |
height=150,
|
636 |
visible=False
|
637 |
)
|
638 |
|
639 |
# Sample 6 - full width
|
640 |
video_output_6 = gr.Video(
|
641 |
+
label="샘플 6",
|
642 |
height=150,
|
643 |
visible=False
|
644 |
)
|
645 |
|
646 |
result_text = gr.Textbox(
|
647 |
+
label="상태",
|
648 |
interactive=False,
|
649 |
lines=2
|
650 |
)
|
651 |
|
652 |
# Examples section at the bottom
|
653 |
with gr.Column(elem_classes=["examples-section"]):
|
654 |
+
gr.Markdown("### 🌟 예제")
|
655 |
+
gr.Markdown("위 인터페이스로 로드하려면 예제를 클릭하세요")
|
656 |
|
657 |
# Define your custom examples here - 8 examples total
|
658 |
examples_data = [
|
|
|
741 |
|
742 |
# Load button
|
743 |
example_btn = gr.Button(
|
744 |
+
f"예제 {idx+1} 로드",
|
745 |
variant="secondary",
|
746 |
size="sm"
|
747 |
)
|
|
|
821 |
# Footer
|
822 |
gr.HTML("""
|
823 |
<div class="footer-text">
|
824 |
+
<p>🚀 HunyuanVideo-Foley로 구동 | 비디오와 텍스트 설명에서 고품질 오디오 생성</p>
|
825 |
</div>
|
826 |
""")
|
827 |
|
|
|
839 |
logger.add(lambda msg: print(msg, end=''), level="INFO")
|
840 |
|
841 |
# Auto-load model
|
842 |
+
logger.info("애플리케이션 시작 및 모델 로딩...")
|
843 |
model_load_result = auto_load_models()
|
844 |
logger.info(model_load_result)
|
845 |
|
|
|
848 |
|
849 |
# Log completion status
|
850 |
if "successfully" in model_load_result:
|
851 |
+
logger.info("애플리케이션 준비 완료, 모델 로드 완료")
|
852 |
|
853 |
# Test
|
854 |
app.launch(
|