Spaces:

nguyensu27
/

CHUYEN_MP3

Sleeping

App Files Files Community

mrsu0994 commited on Sep 1

Commit

14100c7

1 Parent(s): 0d26c6c

upload f5-tts source

Browse files

Files changed (3) hide show

app copy.py +405 -0
app.py +139 -29
requirements copy.txt +47 -0

app copy.py ADDED Viewed

	@@ -0,0 +1,405 @@

+import os
+import sys
+import subprocess
+from huggingface_hub import hf_hub_download
+from pydub import AudioSegment
+import gradio as gr
+import time
+# Thêm thư mục src vào sys.path
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), 'src')))
+def run_f5_tts(ref_audio_path, ref_text, gen_text, model="F5TTS_Base", speed=1.2, vocoder_name="vocos"):
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    infer_cli_path = os.path.join(current_dir, "src", "f5_tts", "infer", "infer_cli.py")
+    tests_dir = os.path.join(current_dir, "tests")
+    print(f"Infer CLI path: {infer_cli_path}")
+    print(f"Does infer_cli.py exist? {os.path.exists(infer_cli_path)}")
+    if not os.path.exists(infer_cli_path):
+        return None, "File infer_cli.py không tồn tại!"
+    try:
+        vocab_file = hf_hub_download(repo_id="nguyensu27/TTS", filename="vocab.txt")
+        ckpt_file = hf_hub_download(repo_id="nguyensu27/TTS", filename="model_last.pt")
+    except Exception as e:
+        return None, f"Lỗi khi tải model/vocab: {str(e)}"
+    os.environ['PYTHONIOENCODING'] = 'utf-8'
+    env = os.environ.copy()
+    env['PYTHONPATH'] = os.path.abspath(os.path.join(current_dir, 'src'))
+    command = [
+        sys.executable,
+        infer_cli_path,
+        "--model", model,
+        "--ref_audio", ref_audio_path,
+        "--ref_text", ref_text,
+        "--gen_text", gen_text,
+        "--speed", str(speed),
+        "--vocoder_name", vocoder_name,
+        "--vocab_file", vocab_file,
+        "--ckpt_file", ckpt_file
+    ]
+    print(f"Running command: {' '.join(command)}")
+    try:
+        result = subprocess.run(
+            command,
+            check=True,
+            capture_output=True,
+            text=True,
+            env=env
+        )
+        print("Subprocess stdout:", result.stdout)
+        if os.path.exists(tests_dir):
+            wav_files = [f for f in os.listdir(tests_dir) if f.endswith('.wav')]
+            if wav_files:
+                latest_wav = max(wav_files, key=lambda x: os.path.getmtime(os.path.join(tests_dir, x)))
+                output_wav = os.path.join(tests_dir, latest_wav)
+                audio = AudioSegment.from_wav(output_wav)
+                output_mp3 = os.path.join(tests_dir, "output.mp3")
+                audio.export(output_mp3, format="mp3")
+                return output_mp3, "Suy luận thành công!"
+        return None, "Không tìm thấy file âm thanh trong thư mục tests"
+    except subprocess.CalledProcessError as e:
+        return None, f"Lỗi khi chạy infer_cli.py: {e.stderr}"
+    except Exception as e:
+        return None, str(e)
+def generate_speech(ref_audio, ref_text, gen_text, speed, model):
+    if ref_audio is None:
+        return None, "Vui lòng tải lên file audio tham chiếu!"
+    # ref_audio là đường dẫn file, tải bằng AudioSegment
+    audio_segment = AudioSegment.from_file(ref_audio)
+    audio_segment = audio_segment.set_channels(1)  # Chuyển sang mono
+    ref_audio_path = f"temp_ref_{int(time.time())}.wav"
+    audio_segment.export(ref_audio_path, format="wav")
+    output_mp3, message = run_f5_tts(ref_audio_path, ref_text, gen_text, model, float(speed))
+    os.remove(ref_audio_path)
+    if output_mp3 and os.path.exists(output_mp3):
+        return output_mp3, message
+    return None, message
+interface = gr.Interface(
+    fn=generate_speech,
+    inputs=[
+        gr.Audio(type="filepath", label="Tải lên file audio tham chiếu (.wav hoặc .mp3)"),
+        gr.Textbox(label="Text tham chiếu", placeholder="Nhập text của audio tham chiếu"),
+        gr.Textbox(label="Text cần sinh", placeholder="Nhập text bạn muốn sinh"),
+        gr.Slider(minimum=0.5, maximum=2.0, value=1.0, label="Tốc độ"),
+        gr.Dropdown(choices=["F5TTS_Base"], value="F5TTS_Base", label="Mô hình")
+    ],
+    outputs=[
+        gr.Audio(type="filepath", label="Kết quả audio (.mp3)"),
+        gr.Textbox(label="Trạng thái")
+    ],
+    title="F5-TTS Suy luận",
+    description="Tải lên audio tham chiếu, nhập text, và sinh audio mới với F5-TTS."
+)
+if __name__ == "__main__":
+    interface.launch(server_name="0.0.0.0", server_port=7860)
+# import os
+# import sys
+# import subprocess
+# from huggingface_hub import hf_hub_download
+# from pydub import AudioSegment
+# import gradio as gr
+# import time
+# # Thêm thư mục src vào sys.path
+# sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), 'src')))
+# def run_f5_tts(ref_audio_path, ref_text, gen_text, model="F5TTS_Base", speed=1.2, vocoder_name="vocos"):
+#     current_dir = os.path.dirname(os.path.abspath(__file__))
+#     infer_cli_path = os.path.join(current_dir, "src", "f5_tts", "infer", "infer_cli.py")
+#     tests_dir = os.path.join(current_dir, "tests")
+#     # Debug: In đường dẫn để kiểm tra
+#     print(f"Infer CLI path: {infer_cli_path}")
+#     print(f"Tests dir: {tests_dir}")
+#     # Tải file từ Hugging Face Hub
+#     try:
+#         vocab_file = hf_hub_download(repo_id="nguyensu27/TTS", filename="vocab.txt")
+#         ckpt_file = hf_hub_download(repo_id="nguyensu27/TTS", filename="model_last.pt")
+#     except Exception as e:
+#         return None, f"Lỗi khi tải model/vocab từ Hugging Face: {str(e)}"
+#     os.environ['PYTHONIOENCODING'] = 'utf-8'
+#     env = os.environ.copy()
+#     env['PYTHONPATH'] = os.path.abspath(os.path.join(current_dir, 'src'))
+#     command = [
+#         sys.executable,
+#         infer_cli_path,
+#         "--model", model,
+#         "--ref_audio", ref_audio_path,
+#         "--ref_text", ref_text,
+#         "--gen_text", gen_text,
+#         "--speed", str(speed),
+#         "--vocoder_name", vocoder_name,
+#         "--vocab_file", vocab_file,
+#         "--ckpt_file", ckpt_file
+#     ]
+#     print(f"Running command: {' '.join(command)}")
+#     try:
+#         result = subprocess.run(
+#             command,
+#             check=True,
+#             capture_output=True,
+#             text=True,
+#             env=env
+#         )
+#         print("Subprocess stdout:", result.stdout)
+#         if os.path.exists(tests_dir):
+#             wav_files = [f for f in os.listdir(tests_dir) if f.endswith('.wav')]
+#             if wav_files:
+#                 latest_wav = max(wav_files, key=lambda x: os.path.getmtime(os.path.join(tests_dir, x)))
+#                 output_wav = os.path.join(tests_dir, latest_wav)
+#                 audio = AudioSegment.from_wav(output_wav)
+#                 output_mp3 = os.path.join(tests_dir, "output.mp3")
+#                 audio.export(output_mp3, format="mp3")
+#                 return output_mp3, "Suy luận thành công!"
+#         return None, "Không tìm thấy file âm thanh trong thư mục tests"
+#     except subprocess.CalledProcessError as e:
+#         return None, f"Lỗi khi chạy infer_cli.py: {e.stderr}"
+#     except Exception as e:
+#         return None, str(e)
+# def generate_speech(ref_audio, ref_text, gen_text, speed, model):
+#     if ref_audio is None:
+#         return None, "Vui lòng tải lên file audio tham chiếu!"
+#     ref_audio_path = f"temp_ref_{int(time.time())}.wav"
+#     ref_audio.convert_audio_channels(1)  # Chuyển sang mono
+#     ref_audio.export(ref_audio_path, format="wav")
+#     output_mp3, message = run_f5_tts(ref_audio_path, ref_text, gen_text, model, float(speed))
+#     os.remove(ref_audio_path)
+#     if output_mp3 and os.path.exists(output_mp3):
+#         return output_mp3, message
+#     return None, message
+# interface = gr.Interface(
+#     fn=generate_speech,
+#     inputs=[
+#         gr.Audio(type="filepath", label="Tải lên file audio tham chiếu (.wav hoặc .mp3)"),
+#         gr.Textbox(label="Text tham chiếu", placeholder="Nhập text của audio tham chiếu"),
+#         gr.Textbox(label="Text cần sinh", placeholder="Nhập text bạn muốn sinh"),
+#         gr.Slider(minimum=0.5, maximum=2.0, value=1.0, label="Tốc độ"),
+#         gr.Dropdown(choices=["F5TTS_Base"], value="F5TTS_Base", label="Mô hình")
+#     ],
+#     outputs=[
+#         gr.Audio(type="filepath", label="Kết quả audio (.mp3)"),
+#         gr.Textbox(label="Trạng thái")
+#     ],
+#     title="F5-TTS Suy luận",
+#     description="Tải lên audio tham chiếu, nhập text, và sinh audio mới với F5-TTS."
+# )
+# if __name__ == "__main__":
+#     interface.launch(server_name="0.0.0.0", server_port=7860)
+# from flask import Flask, request, send_file
+# import subprocess
+# import os
+# import sys
+# from huggingface_hub import hf_hub_download
+# from pydub import AudioSegment
+# sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), 'src')))
+# app = Flask(__name__)
+# def run_f5_tts(ref_audio_path, ref_text, gen_text, model="F5TTS_Base", speed=1.2, vocoder_name="vocos"):
+#     current_dir = os.path.dirname(os.path.abspath(__file__))
+#     infer_cli_path = os.path.join(current_dir, "src", "f5_tts", "infer", "infer_cli.py")
+#     tests_dir = os.path.join(current_dir, "tests")
+#     vocab_file = hf_hub_download(repo_id="nguyensu27/TTS", filename="vocab.txt")
+#     ckpt_file = hf_hub_download(repo_id="nguyensu27/TTS", filename="model_last.pt")
+#     os.environ['PYTHONIOENCODING'] = 'utf-8'
+#     env = os.environ.copy()
+#     env['PYTHONPATH'] = os.path.abspath(os.path.join(current_dir, 'src'))
+#     command = [
+#         sys.executable,
+#         infer_cli_path,
+#         "--model", model,
+#         "--ref_audio", ref_audio_path,
+#         "--ref_text", ref_text,
+#         "--gen_text", gen_text,
+#         "--speed", str(speed),
+#         "--vocoder_name", vocoder_name,
+#         "--vocab_file", vocab_file,
+#         "--ckpt_file", ckpt_file
+#     ]
+#     try:
+#         result = subprocess.run(
+#             command,
+#             check=True,
+#             capture_output=True,
+#             text=True,
+#             encoding='utf-8',
+#             env=env
+#         )
+#         if os.path.exists(tests_dir):
+#             wav_files = [f for f in os.listdir(tests_dir) if f.endswith('.wav')]
+#             if wav_files:
+#                 latest_wav = max(wav_files, key=lambda x: os.path.getmtime(os.path.join(tests_dir, x)))
+#                 output_wav = os.path.join(tests_dir, latest_wav)
+#                 audio = AudioSegment.from_wav(output_wav)
+#                 output_mp3 = os.path.join(tests_dir, "output.mp3")
+#                 audio.export(output_mp3, format="mp3")
+#                 return True, output_mp3
+#         return False, "Không tìm thấy file âm thanh trong thư mục tests"
+#     except subprocess.CalledProcessError as e:
+#         return False, f"Lỗi khi chạy infer_cli.py: {e.stderr}"
+#     except Exception as e:
+#         return False, str(e)
+# @app.route('/')
+# def home():
+#     return "F5-TTS API is running. Use POST /api/generate to generate audio."
+# @app.route('/api/generate', methods=['POST'])
+# def generate_speech():
+#     if 'ref_audio' not in request.files:
+#         return {"error": "Missing ref_audio"}, 400
+#     ref_audio = request.files['ref_audio']
+#     ref_text = request.form.get('ref_text', '')
+#     gen_text = request.form.get('gen_text', '')
+#     model = request.form.get('model', 'F5TTS_Base')
+#     speed = float(request.form.get('speed', 1.2))
+#     import time
+#     ref_audio_path = f"temp_ref_{int(time.time())}.wav"
+#     ref_audio.save(ref_audio_path)
+#     success, result = run_f5_tts(ref_audio_path, ref_text, gen_text, model, speed)
+#     os.remove(ref_audio_path)
+#     if success:
+#         return send_file(result, mimetype='audio/mpeg')
+#     else:
+#         return {"error": result}, 500
+# if __name__ == "__main__":
+#     port = int(os.environ.get("PORT", 7860))
+#     app.run(host="0.0.0.0", port=port, debug=False)
+# from flask import Flask, request, send_file
+# import subprocess
+# import os
+# import sys
+# from huggingface_hub import hf_hub_download
+# sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), 'src')))
+# app = Flask(__name__)
+# # =========================
+# # Hàm chạy F5-TTS
+# # =========================
+# def run_f5_tts(ref_audio_path, ref_text, gen_text, model="F5TTS_Base", speed=1.2, vocoder_name="vocos"):
+#     current_dir = os.path.dirname(os.path.abspath(__file__))
+#     infer_cli_path = os.path.join(current_dir, "src", "f5_tts", "infer", "infer_cli.py")
+#     tests_dir = os.path.join(current_dir, "tests")
+#     # Dùng huggingface_hub để tải file model và vocab từ repo 'nguyensu27/TTS'
+#     vocab_file = hf_hub_download(repo_id="nguyensu27/TTS", filename="vocab.txt")
+#     ckpt_file = hf_hub_download(repo_id="nguyensu27/TTS", filename="model_last.pt")
+#     os.environ['PYTHONIOENCODING'] = 'utf-8'
+#     command = [
+#         sys.executable,
+#         infer_cli_path,
+#         "--model", model,
+#         "--ref_audio", ref_audio_path,
+#         "--ref_text", ref_text,
+#         "--gen_text", gen_text,
+#         "--speed", str(speed),
+#         "--vocoder_name", vocoder_name,
+#         "--vocab_file", vocab_file,
+#         "--ckpt_file", ckpt_file
+#     ]
+#     try:
+#         result = subprocess.run(
+#             command,
+#             check=True,
+#             capture_output=True,
+#             text=True,
+#             encoding='utf-8'
+#         )
+#         if os.path.exists(tests_dir):
+#             wav_files = [f for f in os.listdir(tests_dir) if f.endswith('.wav')]
+#             if wav_files:
+#                 latest_wav = max(
+#                     wav_files, key=lambda x: os.path.getmtime(os.path.join(tests_dir, x))
+#                 )
+#                 output_file = os.path.join(tests_dir, latest_wav)
+#                 return True, output_file
+#         return False, "Không tìm thấy file âm thanh trong thư mục tests"
+#     except subprocess.CalledProcessError as e:
+#         return False, e.stderr
+#     except Exception as e:
+#         return False, str(e)
+# # =========================
+# # Routes
+# # =========================
+# @app.route('/')
+# def home():
+#     return "F5-TTS API is running. Use POST /api/generate to generate audio."
+# @app.route('/api/generate', methods=['POST'])
+# def generate_speech():
+#     if 'ref_audio' not in request.files:
+#         return {"error": "Missing ref_audio"}, 400
+#     ref_audio = request.files['ref_audio']
+#     ref_text = request.form.get('ref_text', '')
+#     gen_text = request.form.get('gen_text', '')
+#     model = request.form.get('model', 'F5TTS_Base')
+#     speed = float(request.form.get('speed', 1.2))
+#     ref_audio_path = 'temp_ref.wav'
+#     ref_audio.save(ref_audio_path)
+#     success, result = run_f5_tts(ref_audio_path, ref_text, gen_text, model, speed)
+#     os.remove(ref_audio_path)
+#     if success:
+#         return send_file(result, mimetype='audio/wav')
+#     else:
+#         return {"error": result}, 500
+# # =========================
+# # Main
+# # =========================
+# if __name__ == "__main__":
+#     port = int(os.environ.get("PORT", 7860))
+#     app.run(host="0.0.0.0", port=port, debug=False)

app.py CHANGED Viewed

@@ -14,8 +14,6 @@ def run_f5_tts(ref_audio_path, ref_text, gen_text, model="F5TTS_Base", speed=1.2
     infer_cli_path = os.path.join(current_dir, "src", "f5_tts", "infer", "infer_cli.py")
     tests_dir = os.path.join(current_dir, "tests")
-    print(f"Infer CLI path: {infer_cli_path}")
-    print(f"Does infer_cli.py exist? {os.path.exists(infer_cli_path)}")
     if not os.path.exists(infer_cli_path):
         return None, "File infer_cli.py không tồn tại!"
@@ -42,7 +40,6 @@ def run_f5_tts(ref_audio_path, ref_text, gen_text, model="F5TTS_Base", speed=1.2
         "--ckpt_file", ckpt_file
     ]
-    print(f"Running command: {' '.join(command)}")
     try:
         result = subprocess.run(
             command,
@@ -51,7 +48,7 @@ def run_f5_tts(ref_audio_path, ref_text, gen_text, model="F5TTS_Base", speed=1.2
             text=True,
             env=env
         )
-        print("Subprocess stdout:", result.stdout)
         if os.path.exists(tests_dir):
             wav_files = [f for f in os.listdir(tests_dir) if f.endswith('.wav')]
             if wav_files:
@@ -60,8 +57,8 @@ def run_f5_tts(ref_audio_path, ref_text, gen_text, model="F5TTS_Base", speed=1.2
                 audio = AudioSegment.from_wav(output_wav)
                 output_mp3 = os.path.join(tests_dir, "output.mp3")
                 audio.export(output_mp3, format="mp3")
-                return output_mp3, "Suy luận thành công!"
-        return None, "Không tìm thấy file âm thanh trong thư mục tests"
     except subprocess.CalledProcessError as e:
         return None, f"Lỗi khi chạy infer_cli.py: {e.stderr}"
     except Exception as e:
@@ -69,7 +66,8 @@ def run_f5_tts(ref_audio_path, ref_text, gen_text, model="F5TTS_Base", speed=1.2
 def generate_speech(ref_audio, ref_text, gen_text, speed, model):
     if ref_audio is None:
-        return None, "Vui lòng tải lên file audio tham chiếu!"
     # ref_audio là đường dẫn file, tải bằng AudioSegment
     audio_segment = AudioSegment.from_file(ref_audio)
     audio_segment = audio_segment.set_channels(1)  # Chuyển sang mono
@@ -77,34 +75,146 @@ def generate_speech(ref_audio, ref_text, gen_text, speed, model):
     audio_segment.export(ref_audio_path, format="wav")
     output_mp3, message = run_f5_tts(ref_audio_path, ref_text, gen_text, model, float(speed))
-    os.remove(ref_audio_path)
-    if output_mp3 and os.path.exists(output_mp3):
-        return output_mp3, message
-    return None, message
-interface = gr.Interface(
-    fn=generate_speech,
-    inputs=[
-        gr.Audio(type="filepath", label="Tải lên file audio tham chiếu (.wav hoặc .mp3)"),
-        gr.Textbox(label="Text tham chiếu", placeholder="Nhập text của audio tham chiếu"),
-        gr.Textbox(label="Text cần sinh", placeholder="Nhập text bạn muốn sinh"),
-        gr.Slider(minimum=0.5, maximum=2.0, value=1.0, label="Tốc độ"),
-        gr.Dropdown(choices=["F5TTS_Base"], value="F5TTS_Base", label="Mô hình")
-    ],
-    outputs=[
-        gr.Audio(type="filepath", label="Kết quả audio (.mp3)"),
-        gr.Textbox(label="Trạng thái")
-    ],
-    title="F5-TTS Suy luận",
-    description="Tải lên audio tham chiếu, nhập text, và sinh audio mới với F5-TTS."
-)
 if __name__ == "__main__":
     interface.launch(server_name="0.0.0.0", server_port=7860)
 # import os
 # import sys
 # import subprocess

     infer_cli_path = os.path.join(current_dir, "src", "f5_tts", "infer", "infer_cli.py")
     tests_dir = os.path.join(current_dir, "tests")
     if not os.path.exists(infer_cli_path):
         return None, "File infer_cli.py không tồn tại!"
         "--ckpt_file", ckpt_file
     ]
     try:
         result = subprocess.run(
             command,
             text=True,
             env=env
         )
+        # Kiểm tra thư mục tests có file wav xuất ra không
         if os.path.exists(tests_dir):
             wav_files = [f for f in os.listdir(tests_dir) if f.endswith('.wav')]
             if wav_files:
                 audio = AudioSegment.from_wav(output_wav)
                 output_mp3 = os.path.join(tests_dir, "output.mp3")
                 audio.export(output_mp3, format="mp3")
+                return output_mp3, "✅ Suy luận thành công!"
+        return None, "❌ Không tìm thấy file âm thanh trong thư mục tests"
     except subprocess.CalledProcessError as e:
         return None, f"Lỗi khi chạy infer_cli.py: {e.stderr}"
     except Exception as e:
 def generate_speech(ref_audio, ref_text, gen_text, speed, model):
     if ref_audio is None:
+        return None, "⚠️ Vui lòng tải lên file audio tham chiếu!"
     # ref_audio là đường dẫn file, tải bằng AudioSegment
     audio_segment = AudioSegment.from_file(ref_audio)
     audio_segment = audio_segment.set_channels(1)  # Chuyển sang mono
     audio_segment.export(ref_audio_path, format="wav")
     output_mp3, message = run_f5_tts(ref_audio_path, ref_text, gen_text, model, float(speed))
+    # Xóa file tạm
+    if os.path.exists(ref_audio_path):
+        os.remove(ref_audio_path)
+    return output_mp3, message
+# ====================== Gradio UI ======================
+with gr.Blocks() as interface:
+    gr.Markdown("## 🎙️ F5-TTS Suy luận")
+    gr.Markdown("Tải lên audio tham chiếu, nhập text, và sinh audio mới với F5-TTS.")
+    with gr.Row():
+        with gr.Column():
+            ref_audio = gr.Audio(type="filepath", label="📂 Tải lên file audio tham chiếu (.wav hoặc .mp3)")
+            ref_text = gr.Textbox(label="📝 Text tham chiếu")
+            gen_text = gr.Textbox(label="📝 Text cần sinh")
+            speed = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, label="⚡ Tốc độ")
+            model = gr.Dropdown(choices=["F5TTS_Base"], value="F5TTS_Base", label="🤖 Mô hình")
+            btn = gr.Button("🚀 Sinh giọng nói")
+        with gr.Column():
+            output_audio = gr.Audio(type="filepath", label="🔊 Kết quả audio (.mp3)")
+            output_status = gr.Textbox(label="📌 Trạng thái")
+    btn.click(generate_speech, [ref_audio, ref_text, gen_text, speed, model], [output_audio, output_status])
 if __name__ == "__main__":
     interface.launch(server_name="0.0.0.0", server_port=7860)
+# import os
+# import sys
+# import subprocess
+# from huggingface_hub import hf_hub_download
+# from pydub import AudioSegment
+# import gradio as gr
+# import time
+# # Thêm thư mục src vào sys.path
+# sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), 'src')))
+# def run_f5_tts(ref_audio_path, ref_text, gen_text, model="F5TTS_Base", speed=1.2, vocoder_name="vocos"):
+#     current_dir = os.path.dirname(os.path.abspath(__file__))
+#     infer_cli_path = os.path.join(current_dir, "src", "f5_tts", "infer", "infer_cli.py")
+#     tests_dir = os.path.join(current_dir, "tests")
+#     print(f"Infer CLI path: {infer_cli_path}")
+#     print(f"Does infer_cli.py exist? {os.path.exists(infer_cli_path)}")
+#     if not os.path.exists(infer_cli_path):
+#         return None, "File infer_cli.py không tồn tại!"
+#     try:
+#         vocab_file = hf_hub_download(repo_id="nguyensu27/TTS", filename="vocab.txt")
+#         ckpt_file = hf_hub_download(repo_id="nguyensu27/TTS", filename="model_last.pt")
+#     except Exception as e:
+#         return None, f"Lỗi khi tải model/vocab: {str(e)}"
+#     os.environ['PYTHONIOENCODING'] = 'utf-8'
+#     env = os.environ.copy()
+#     env['PYTHONPATH'] = os.path.abspath(os.path.join(current_dir, 'src'))
+#     command = [
+#         sys.executable,
+#         infer_cli_path,
+#         "--model", model,
+#         "--ref_audio", ref_audio_path,
+#         "--ref_text", ref_text,
+#         "--gen_text", gen_text,
+#         "--speed", str(speed),
+#         "--vocoder_name", vocoder_name,
+#         "--vocab_file", vocab_file,
+#         "--ckpt_file", ckpt_file
+#     ]
+#     print(f"Running command: {' '.join(command)}")
+#     try:
+#         result = subprocess.run(
+#             command,
+#             check=True,
+#             capture_output=True,
+#             text=True,
+#             env=env
+#         )
+#         print("Subprocess stdout:", result.stdout)
+#         if os.path.exists(tests_dir):
+#             wav_files = [f for f in os.listdir(tests_dir) if f.endswith('.wav')]
+#             if wav_files:
+#                 latest_wav = max(wav_files, key=lambda x: os.path.getmtime(os.path.join(tests_dir, x)))
+#                 output_wav = os.path.join(tests_dir, latest_wav)
+#                 audio = AudioSegment.from_wav(output_wav)
+#                 output_mp3 = os.path.join(tests_dir, "output.mp3")
+#                 audio.export(output_mp3, format="mp3")
+#                 return output_mp3, "Suy luận thành công!"
+#         return None, "Không tìm thấy file âm thanh trong thư mục tests"
+#     except subprocess.CalledProcessError as e:
+#         return None, f"Lỗi khi chạy infer_cli.py: {e.stderr}"
+#     except Exception as e:
+#         return None, str(e)
+# def generate_speech(ref_audio, ref_text, gen_text, speed, model):
+#     if ref_audio is None:
+#         return None, "Vui lòng tải lên file audio tham chiếu!"
+#     # ref_audio là đường dẫn file, tải bằng AudioSegment
+#     audio_segment = AudioSegment.from_file(ref_audio)
+#     audio_segment = audio_segment.set_channels(1)  # Chuyển sang mono
+#     ref_audio_path = f"temp_ref_{int(time.time())}.wav"
+#     audio_segment.export(ref_audio_path, format="wav")
+#     output_mp3, message = run_f5_tts(ref_audio_path, ref_text, gen_text, model, float(speed))
+#     os.remove(ref_audio_path)
+#     if output_mp3 and os.path.exists(output_mp3):
+#         return output_mp3, message
+#     return None, message
+# interface = gr.Interface(
+#     fn=generate_speech,
+#     inputs=[
+#         gr.Audio(type="filepath", label="Tải lên file audio tham chiếu (.wav hoặc .mp3)"),
+#         gr.Textbox(label="Text tham chiếu", placeholder="Nhập text của audio tham chiếu"),
+#         gr.Textbox(label="Text cần sinh", placeholder="Nhập text bạn muốn sinh"),
+#         gr.Slider(minimum=0.5, maximum=2.0, value=1.0, label="Tốc độ"),
+#         gr.Dropdown(choices=["F5TTS_Base"], value="F5TTS_Base", label="Mô hình")
+#     ],
+#     outputs=[
+#         gr.Audio(type="filepath", label="Kết quả audio (.mp3)"),
+#         gr.Textbox(label="Trạng thái")
+#     ],
+#     title="F5-TTS Suy luận",
+#     description="Tải lên audio tham chiếu, nhập text, và sinh audio mới với F5-TTS."
+# )
+# if __name__ == "__main__":
+#     interface.launch(server_name="0.0.0.0", server_port=7860)
 # import os
 # import sys
 # import subprocess

requirements copy.txt ADDED Viewed

	@@ -0,0 +1,47 @@

+accelerate
+aiohttp
+anyio
+attrs
+audioread
+bitsandbytes
+datasets
+einops
+ema-pytorch
+encodec
+fastapi
+ffmpy
+Flask
+gradio
+huggingface-hub
+hydra-core
+joblib
+librosa
+loguru
+matplotlib
+numpy
+omegaconf
+pandas
+pillow
+psutil
+pydantic
+pydub
+pypinyin
+requests
+scikit-learn
+scipy
+soundfile
+soxr
+starlette
+sympy
+torch>=2.2
+torchaudio>=2.2
+tqdm
+transformers>=4.40
+uvicorn
+vocos
+x-transformers
+huggingface_hub
+tomli
+cached-path
+gradio
+torchdiffeq