tchung1970 Claude commited on
Commit
c97cd22
·
1 Parent(s): 3121e9b

Localize Gradio interface to Korean

Browse files

- Translated all UI text strings from English to Korean
- Updated header title and description
- Localized input labels: video upload, audio description, CFG scale, steps, sample numbers
- Translated button text and status messages
- Updated quick start guide and examples section
- Localized all logging messages and error messages
- Maintained English text prompts as they are expected by the model

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <[email protected]>

Files changed (1) hide show
  1. app.py +56 -56
app.py CHANGED
@@ -26,10 +26,10 @@ MODEL_PATH = os.environ.get("HIFI_FOLEY_MODEL_PATH", "./pretrained_models/")
26
  CONFIG_PATH = "configs/hunyuanvideo-foley-xxl.yaml"
27
 
28
  def download_model_from_hf(repo_id: str = "tencent/HunyuanVideo-Foley", local_dir: str = "./pretrained_models") -> str:
29
- """HuggingFace自动下载模型到本地目录"""
30
  try:
31
- logger.info(f"开始从HuggingFace下载模型:{repo_id}")
32
- logger.info(f"下载目标目录:{local_dir}")
33
 
34
  # 确保本地目录存在
35
  os.makedirs(local_dir, exist_ok=True)
@@ -42,11 +42,11 @@ def download_model_from_hf(repo_id: str = "tencent/HunyuanVideo-Foley", local_di
42
  local_files_only=False, # 允许从网络下载
43
  )
44
 
45
- logger.info(f"✅ 模型下载成功!保存在:{local_dir}")
46
- return f"✅ 模型从 {repo_id} 下载成功!"
47
 
48
  except Exception as e:
49
- error_msg = f"❌ 模型下载失败:{str(e)}"
50
  logger.error(error_msg)
51
  return error_msg
52
 
@@ -72,48 +72,48 @@ def setup_device(device_str: str = "auto", gpu_id: int = 0) -> torch.device:
72
  return device
73
 
74
  def auto_load_models() -> str:
75
- """Automatically load preset models"""
76
  global model_dict, cfg, device
77
 
78
  try:
79
- # 如果模型路径不存在,尝试从HuggingFace下载
80
  if not os.path.exists(MODEL_PATH):
81
- logger.info(f"模型路径 {MODEL_PATH} 不存在,开始从HuggingFace下载...")
82
  download_result = download_model_from_hf(local_dir=MODEL_PATH.rstrip('/'))
83
- if "失败" in download_result:
84
  return download_result
85
 
86
- # 如果配置文件不存在,也尝试从HuggingFace下载
87
  if not os.path.exists(CONFIG_PATH):
88
- logger.info(f"配置文件 {CONFIG_PATH} 不存在,尝试从HuggingFace下载...")
89
- # 如果是从pretrained_models/配置路径,也尝试下载
90
  if CONFIG_PATH.startswith("configs/"):
91
  config_dir = os.path.dirname(CONFIG_PATH)
92
  if not os.path.exists(config_dir):
93
  download_result = download_model_from_hf(local_dir="./")
94
- if "失败" in download_result:
95
  return download_result
96
 
97
- # 最后检查配置文件是否存在
98
  if not os.path.exists(CONFIG_PATH):
99
- return f"❌ 配置文件未找到: {CONFIG_PATH}"
100
 
101
  # Use GPU by default
102
  device = setup_device("auto", 0)
103
 
104
- # Load model
105
- logger.info("正在加载模型...")
106
- logger.info(f"模型路径: {MODEL_PATH}")
107
- logger.info(f"配置路径: {CONFIG_PATH}")
108
 
109
  model_dict, cfg = load_model(MODEL_PATH, CONFIG_PATH, device)
110
 
111
- logger.info("✅ 模型加载成功!")
112
- return "✅ 模型加载成功!"
113
 
114
  except Exception as e:
115
- logger.error(f"模型加载失败: {str(e)}")
116
- return f"❌ 模型加载失败: {str(e)}"
117
 
118
  @spaces.GPU(duration=120)
119
  @torch.inference_mode()
@@ -128,10 +128,10 @@ def infer_single_video(
128
  global model_dict, cfg, device
129
 
130
  if model_dict is None or cfg is None:
131
- return [], "❌ Please load the model first!"
132
 
133
  if video_file is None:
134
- return [], "❌ Please upload a video file!"
135
 
136
  # Allow empty text prompt, use empty string if no prompt provided
137
  if text_prompt is None:
@@ -153,7 +153,7 @@ def infer_single_video(
153
  # Denoising process to generate multiple audio samples
154
  # Note: The model now generates sample_nums audio samples per inference
155
  # The denoise_process function returns audio with shape [batch_size, channels, samples]
156
- logger.info(f"Generating {sample_nums} audio samples...")
157
  audio, sample_rate = denoise_process(
158
  visual_feats,
159
  text_feats,
@@ -180,12 +180,12 @@ def infer_single_video(
180
  merge_audio_video(audio_output, video_file, video_output)
181
  video_outputs.append(video_output)
182
 
183
- logger.info(f"Inference completed! Generated {sample_nums} samples.")
184
- return video_outputs, f"✅ Generated {sample_nums} audio sample(s) successfully!"
185
 
186
  except Exception as e:
187
- logger.error(f"Inference failed: {str(e)}")
188
- return [], f"❌ Inference failed: {str(e)}"
189
 
190
  def update_video_outputs(video_list, status_msg):
191
  """Update video outputs based on the number of generated samples"""
@@ -538,31 +538,31 @@ def create_gradio_interface():
538
  with gr.Column(elem_classes=["main-header"]):
539
  gr.HTML("""
540
  <h1>🎵 HunyuanVideo-Foley</h1>
541
- <p>Text-Video-to-Audio Synthesis: Generate realistic audio from video and text descriptions</p>
542
  """)
543
 
544
  # Usage Guide
545
  with gr.Column(elem_classes=["status-card"]):
546
  gr.Markdown("""
547
- ### 📋 Quick Start Guide
548
- **1.** Upload your video file\t**2.** Add optional text description\t**3.** Adjust sample numbers (1-6)\t**4.** Click Generate Audio
549
 
550
- 💡 For quick start, you can load the prepared examples by clicking the button.
551
  """, elem_classes=["usage-guide"])
552
 
553
  # Main inference interface - Input and Results side by side
554
  with gr.Row(elem_classes=["main-interface"]):
555
  # Input section
556
  with gr.Column(scale=1, elem_classes=["input-section"]):
557
- gr.Markdown("### 📹 Video Input")
558
 
559
  video_input = gr.Video(
560
- label="Upload Video",
561
  height=300
562
  )
563
 
564
  text_input = gr.Textbox(
565
- label="🎯 Audio Description (English)",
566
  placeholder="A person walks on frozen ice",
567
  lines=3,
568
  )
@@ -573,7 +573,7 @@ def create_gradio_interface():
573
  maximum=10.0,
574
  value=4.5,
575
  step=0.1,
576
- label="🎚️ CFG Scale",
577
  )
578
 
579
  inference_steps = gr.Slider(
@@ -581,7 +581,7 @@ def create_gradio_interface():
581
  maximum=100,
582
  value=50,
583
  step=5,
584
- label="⚡ Steps",
585
  )
586
 
587
  sample_nums = gr.Slider(
@@ -589,24 +589,24 @@ def create_gradio_interface():
589
  maximum=6,
590
  value=1,
591
  step=1,
592
- label="🎲 Sample Nums",
593
  )
594
 
595
  generate_btn = gr.Button(
596
- "🎵 Generate Audio",
597
  variant="primary",
598
  elem_classes=["generate-btn"]
599
  )
600
 
601
  # Results section
602
  with gr.Column(scale=1, elem_classes=["output-section"]):
603
- gr.Markdown("### 🎥 Generated Results")
604
 
605
  # Multi-video gallery for displaying multiple generated samples
606
  with gr.Column():
607
  # Primary video (Sample 1)
608
  video_output_1 = gr.Video(
609
- label="Sample 1",
610
  height=250,
611
  visible=True
612
  )
@@ -615,44 +615,44 @@ def create_gradio_interface():
615
  with gr.Row(elem_classes=["additional-samples"]):
616
  with gr.Column(scale=1):
617
  video_output_2 = gr.Video(
618
- label="Sample 2",
619
  height=150,
620
  visible=False
621
  )
622
  video_output_3 = gr.Video(
623
- label="Sample 3",
624
  height=150,
625
  visible=False
626
  )
627
  with gr.Column(scale=1):
628
  video_output_4 = gr.Video(
629
- label="Sample 4",
630
  height=150,
631
  visible=False
632
  )
633
  video_output_5 = gr.Video(
634
- label="Sample 5",
635
  height=150,
636
  visible=False
637
  )
638
 
639
  # Sample 6 - full width
640
  video_output_6 = gr.Video(
641
- label="Sample 6",
642
  height=150,
643
  visible=False
644
  )
645
 
646
  result_text = gr.Textbox(
647
- label="Status",
648
  interactive=False,
649
  lines=2
650
  )
651
 
652
  # Examples section at the bottom
653
  with gr.Column(elem_classes=["examples-section"]):
654
- gr.Markdown("### 🌟 Examples")
655
- gr.Markdown("Click on any example to load it into the interface above")
656
 
657
  # Define your custom examples here - 8 examples total
658
  examples_data = [
@@ -741,7 +741,7 @@ def create_gradio_interface():
741
 
742
  # Load button
743
  example_btn = gr.Button(
744
- f"Load Example {idx+1}",
745
  variant="secondary",
746
  size="sm"
747
  )
@@ -821,7 +821,7 @@ def create_gradio_interface():
821
  # Footer
822
  gr.HTML("""
823
  <div class="footer-text">
824
- <p>🚀 Powered by HunyuanVideo-Foley | Generate high-quality audio from video and text descriptions</p>
825
  </div>
826
  """)
827
 
@@ -839,7 +839,7 @@ if __name__ == "__main__":
839
  logger.add(lambda msg: print(msg, end=''), level="INFO")
840
 
841
  # Auto-load model
842
- logger.info("Starting application and loading model...")
843
  model_load_result = auto_load_models()
844
  logger.info(model_load_result)
845
 
@@ -848,7 +848,7 @@ if __name__ == "__main__":
848
 
849
  # Log completion status
850
  if "successfully" in model_load_result:
851
- logger.info("Application ready, model loaded")
852
 
853
  # Test
854
  app.launch(
 
26
  CONFIG_PATH = "configs/hunyuanvideo-foley-xxl.yaml"
27
 
28
  def download_model_from_hf(repo_id: str = "tencent/HunyuanVideo-Foley", local_dir: str = "./pretrained_models") -> str:
29
+ """HuggingFace에서 모델을 로컬 디렉토리로 자동 다운로드"""
30
  try:
31
+ logger.info(f"HuggingFace에서 모델 다운로드 시작: {repo_id}")
32
+ logger.info(f"다운로드 대상 디렉토리: {local_dir}")
33
 
34
  # 确保本地目录存在
35
  os.makedirs(local_dir, exist_ok=True)
 
42
  local_files_only=False, # 允许从网络下载
43
  )
44
 
45
+ logger.info(f"✅ 모델 다운로드 성공! 저장 위치: {local_dir}")
46
+ return f"✅ {repo_id}에서 모델 다운로드 성공!"
47
 
48
  except Exception as e:
49
+ error_msg = f"❌ 모델 다운로드 실패: {str(e)}"
50
  logger.error(error_msg)
51
  return error_msg
52
 
 
72
  return device
73
 
74
  def auto_load_models() -> str:
75
+ """사전 설정된 모델을 자동으로 로드"""
76
  global model_dict, cfg, device
77
 
78
  try:
79
+ # 모델 경로가 존재하지 않으면 HuggingFace에서 다운로드 시도
80
  if not os.path.exists(MODEL_PATH):
81
+ logger.info(f"모델 경로 {MODEL_PATH} 존재하지 않아 HuggingFace에서 다운로드 시작...")
82
  download_result = download_model_from_hf(local_dir=MODEL_PATH.rstrip('/'))
83
+ if "실패" in download_result:
84
  return download_result
85
 
86
+ # 구성 파일이 존재하지 않으면 HuggingFace에서 다운로드 시도
87
  if not os.path.exists(CONFIG_PATH):
88
+ logger.info(f"구성 파일 {CONFIG_PATH} 존재하지 않아 HuggingFace에서 다운로드 시도...")
89
+ # pretrained_models/ 구성 경로인 경우 다운로드 시도
90
  if CONFIG_PATH.startswith("configs/"):
91
  config_dir = os.path.dirname(CONFIG_PATH)
92
  if not os.path.exists(config_dir):
93
  download_result = download_model_from_hf(local_dir="./")
94
+ if "실패" in download_result:
95
  return download_result
96
 
97
+ # 구성 파일 존재 여부 최종 확인
98
  if not os.path.exists(CONFIG_PATH):
99
+ return f"❌ 구성 파일을 찾을 수 없음: {CONFIG_PATH}"
100
 
101
  # Use GPU by default
102
  device = setup_device("auto", 0)
103
 
104
+ # 모델 로드
105
+ logger.info("모델 로딩 중...")
106
+ logger.info(f"모델 경로: {MODEL_PATH}")
107
+ logger.info(f"구성 경로: {CONFIG_PATH}")
108
 
109
  model_dict, cfg = load_model(MODEL_PATH, CONFIG_PATH, device)
110
 
111
+ logger.info("✅ 모델 로딩 성공!")
112
+ return "✅ 모델 로딩 성공!"
113
 
114
  except Exception as e:
115
+ logger.error(f"모델 로딩 실패: {str(e)}")
116
+ return f"❌ 모델 로딩 실패: {str(e)}"
117
 
118
  @spaces.GPU(duration=120)
119
  @torch.inference_mode()
 
128
  global model_dict, cfg, device
129
 
130
  if model_dict is None or cfg is None:
131
+ return [], "❌ 먼저 모델을 로드해주세요!"
132
 
133
  if video_file is None:
134
+ return [], "❌ 비디오 파일을 업로드해주세요!"
135
 
136
  # Allow empty text prompt, use empty string if no prompt provided
137
  if text_prompt is None:
 
153
  # Denoising process to generate multiple audio samples
154
  # Note: The model now generates sample_nums audio samples per inference
155
  # The denoise_process function returns audio with shape [batch_size, channels, samples]
156
+ logger.info(f"{sample_nums} 오디오 샘플 생성 중...")
157
  audio, sample_rate = denoise_process(
158
  visual_feats,
159
  text_feats,
 
180
  merge_audio_video(audio_output, video_file, video_output)
181
  video_outputs.append(video_output)
182
 
183
+ logger.info(f"추론 완료! {sample_nums} 샘플 생성됨.")
184
+ return video_outputs, f"✅ {sample_nums} 오디오 샘플이 성공적으로 생성되었습니다!"
185
 
186
  except Exception as e:
187
+ logger.error(f"추론 실패: {str(e)}")
188
+ return [], f"❌ 추론 실패: {str(e)}"
189
 
190
  def update_video_outputs(video_list, status_msg):
191
  """Update video outputs based on the number of generated samples"""
 
538
  with gr.Column(elem_classes=["main-header"]):
539
  gr.HTML("""
540
  <h1>🎵 HunyuanVideo-Foley</h1>
541
+ <p>텍스트-비디오-오디오 합성: 비디오와 텍스트 설명에서 사실적인 오디오 생성</p>
542
  """)
543
 
544
  # Usage Guide
545
  with gr.Column(elem_classes=["status-card"]):
546
  gr.Markdown("""
547
+ ### 📋 빠른 시작 가이드
548
+ **1.** 비디오 파일 업로드\t**2.** 선택적 텍스트 설명 추가\t**3.** 샘플 조정 (1-6)\t**4.** 오디오 생성 클릭
549
 
550
+ 💡 빠른 시작을 위해 버튼을 클릭하여 준비된 예제를 로드할 있습니다.
551
  """, elem_classes=["usage-guide"])
552
 
553
  # Main inference interface - Input and Results side by side
554
  with gr.Row(elem_classes=["main-interface"]):
555
  # Input section
556
  with gr.Column(scale=1, elem_classes=["input-section"]):
557
+ gr.Markdown("### 📹 비디오 입력")
558
 
559
  video_input = gr.Video(
560
+ label="비디오 업로드",
561
  height=300
562
  )
563
 
564
  text_input = gr.Textbox(
565
+ label="🎯 오디오 설명 (영어)",
566
  placeholder="A person walks on frozen ice",
567
  lines=3,
568
  )
 
573
  maximum=10.0,
574
  value=4.5,
575
  step=0.1,
576
+ label="🎚️ CFG 스케일",
577
  )
578
 
579
  inference_steps = gr.Slider(
 
581
  maximum=100,
582
  value=50,
583
  step=5,
584
+ label="⚡ 단계",
585
  )
586
 
587
  sample_nums = gr.Slider(
 
589
  maximum=6,
590
  value=1,
591
  step=1,
592
+ label="🎲 샘플 ",
593
  )
594
 
595
  generate_btn = gr.Button(
596
+ "🎵 오디오 생성",
597
  variant="primary",
598
  elem_classes=["generate-btn"]
599
  )
600
 
601
  # Results section
602
  with gr.Column(scale=1, elem_classes=["output-section"]):
603
+ gr.Markdown("### 🎥 생성 결과")
604
 
605
  # Multi-video gallery for displaying multiple generated samples
606
  with gr.Column():
607
  # Primary video (Sample 1)
608
  video_output_1 = gr.Video(
609
+ label="샘플 1",
610
  height=250,
611
  visible=True
612
  )
 
615
  with gr.Row(elem_classes=["additional-samples"]):
616
  with gr.Column(scale=1):
617
  video_output_2 = gr.Video(
618
+ label="샘플 2",
619
  height=150,
620
  visible=False
621
  )
622
  video_output_3 = gr.Video(
623
+ label="샘플 3",
624
  height=150,
625
  visible=False
626
  )
627
  with gr.Column(scale=1):
628
  video_output_4 = gr.Video(
629
+ label="샘플 4",
630
  height=150,
631
  visible=False
632
  )
633
  video_output_5 = gr.Video(
634
+ label="샘플 5",
635
  height=150,
636
  visible=False
637
  )
638
 
639
  # Sample 6 - full width
640
  video_output_6 = gr.Video(
641
+ label="샘플 6",
642
  height=150,
643
  visible=False
644
  )
645
 
646
  result_text = gr.Textbox(
647
+ label="상태",
648
  interactive=False,
649
  lines=2
650
  )
651
 
652
  # Examples section at the bottom
653
  with gr.Column(elem_classes=["examples-section"]):
654
+ gr.Markdown("### 🌟 예제")
655
+ gr.Markdown(" 인터페이스로 로드하려면 예제를 클릭하세요")
656
 
657
  # Define your custom examples here - 8 examples total
658
  examples_data = [
 
741
 
742
  # Load button
743
  example_btn = gr.Button(
744
+ f"예제 {idx+1} 로드",
745
  variant="secondary",
746
  size="sm"
747
  )
 
821
  # Footer
822
  gr.HTML("""
823
  <div class="footer-text">
824
+ <p>🚀 HunyuanVideo-Foley 구동 | 비디오와 텍스트 설명에서 고품질 오디오 생성</p>
825
  </div>
826
  """)
827
 
 
839
  logger.add(lambda msg: print(msg, end=''), level="INFO")
840
 
841
  # Auto-load model
842
+ logger.info("애플리케이션 시작 모델 로딩...")
843
  model_load_result = auto_load_models()
844
  logger.info(model_load_result)
845
 
 
848
 
849
  # Log completion status
850
  if "successfully" in model_load_result:
851
+ logger.info("애플리케이션 준비 완료, 모델 로드 완료")
852
 
853
  # Test
854
  app.launch(