Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| import subprocess | |
| from huggingface_hub import hf_hub_download | |
| from pydub import AudioSegment | |
| import gradio as gr | |
| import time | |
| # Thêm thư mục src vào sys.path | |
| sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), 'src'))) | |
| def run_f5_tts(ref_audio_path, ref_text, gen_text, model="F5TTS_Base", speed=1.2, vocoder_name="vocos"): | |
| current_dir = os.path.dirname(os.path.abspath(__file__)) | |
| infer_cli_path = os.path.join(current_dir, "src", "f5_tts", "infer", "infer_cli.py") | |
| tests_dir = os.path.join(current_dir, "tests") | |
| print(f"Infer CLI path: {infer_cli_path}") | |
| print(f"Does infer_cli.py exist? {os.path.exists(infer_cli_path)}") | |
| if not os.path.exists(infer_cli_path): | |
| return None, "File infer_cli.py không tồn tại!" | |
| try: | |
| vocab_file = hf_hub_download(repo_id="nguyensu27/TTS", filename="vocab.txt") | |
| ckpt_file = hf_hub_download(repo_id="nguyensu27/TTS", filename="model_last.pt") | |
| except Exception as e: | |
| return None, f"Lỗi khi tải model/vocab: {str(e)}" | |
| os.environ['PYTHONIOENCODING'] = 'utf-8' | |
| env = os.environ.copy() | |
| env['PYTHONPATH'] = os.path.abspath(os.path.join(current_dir, 'src')) | |
| command = [ | |
| sys.executable, | |
| infer_cli_path, | |
| "--model", model, | |
| "--ref_audio", ref_audio_path, | |
| "--ref_text", ref_text, | |
| "--gen_text", gen_text, | |
| "--speed", str(speed), | |
| "--vocoder_name", vocoder_name, | |
| "--vocab_file", vocab_file, | |
| "--ckpt_file", ckpt_file | |
| ] | |
| print(f"Running command: {' '.join(command)}") | |
| try: | |
| result = subprocess.run( | |
| command, | |
| check=True, | |
| capture_output=True, | |
| text=True, | |
| env=env | |
| ) | |
| print("Subprocess stdout:", result.stdout) | |
| if os.path.exists(tests_dir): | |
| wav_files = [f for f in os.listdir(tests_dir) if f.endswith('.wav')] | |
| if wav_files: | |
| latest_wav = max(wav_files, key=lambda x: os.path.getmtime(os.path.join(tests_dir, x))) | |
| output_wav = os.path.join(tests_dir, latest_wav) | |
| audio = AudioSegment.from_wav(output_wav) | |
| output_mp3 = os.path.join(tests_dir, "output.mp3") | |
| audio.export(output_mp3, format="mp3") | |
| return output_mp3, "Suy luận thành công!" | |
| return None, "Không tìm thấy file âm thanh trong thư mục tests" | |
| except subprocess.CalledProcessError as e: | |
| return None, f"Lỗi khi chạy infer_cli.py: {e.stderr}" | |
| except Exception as e: | |
| return None, str(e) | |
| def generate_speech(ref_audio, ref_text, gen_text, speed, model): | |
| if ref_audio is None: | |
| return None, "Vui lòng tải lên file audio tham chiếu!" | |
| # ref_audio là đường dẫn file, tải bằng AudioSegment | |
| audio_segment = AudioSegment.from_file(ref_audio) | |
| audio_segment = audio_segment.set_channels(1) # Chuyển sang mono | |
| ref_audio_path = f"temp_ref_{int(time.time())}.wav" | |
| audio_segment.export(ref_audio_path, format="wav") | |
| output_mp3, message = run_f5_tts(ref_audio_path, ref_text, gen_text, model, float(speed)) | |
| os.remove(ref_audio_path) | |
| if output_mp3 and os.path.exists(output_mp3): | |
| return output_mp3, message | |
| return None, message | |
| interface = gr.Interface( | |
| fn=generate_speech, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="Tải lên file audio tham chiếu (.wav hoặc .mp3)"), | |
| gr.Textbox(label="Text tham chiếu", placeholder="Nhập text của audio tham chiếu"), | |
| gr.Textbox(label="Text cần sinh", placeholder="Nhập text bạn muốn sinh"), | |
| gr.Slider(minimum=0.5, maximum=2.0, value=1.0, label="Tốc độ"), | |
| gr.Dropdown(choices=["F5TTS_Base"], value="F5TTS_Base", label="Mô hình") | |
| ], | |
| outputs=[ | |
| gr.Audio(type="filepath", label="Kết quả audio (.mp3)"), | |
| gr.Textbox(label="Trạng thái") | |
| ], | |
| title="F5-TTS Suy luận", | |
| description="Tải lên audio tham chiếu, nhập text, và sinh audio mới với F5-TTS." | |
| ) | |
| if __name__ == "__main__": | |
| interface.launch(server_name="0.0.0.0", server_port=7860) | |
| # import os | |
| # import sys | |
| # import subprocess | |
| # from huggingface_hub import hf_hub_download | |
| # from pydub import AudioSegment | |
| # import gradio as gr | |
| # import time | |
| # # Thêm thư mục src vào sys.path | |
| # sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), 'src'))) | |
| # def run_f5_tts(ref_audio_path, ref_text, gen_text, model="F5TTS_Base", speed=1.2, vocoder_name="vocos"): | |
| # current_dir = os.path.dirname(os.path.abspath(__file__)) | |
| # infer_cli_path = os.path.join(current_dir, "src", "f5_tts", "infer", "infer_cli.py") | |
| # tests_dir = os.path.join(current_dir, "tests") | |
| # # Debug: In đường dẫn để kiểm tra | |
| # print(f"Infer CLI path: {infer_cli_path}") | |
| # print(f"Tests dir: {tests_dir}") | |
| # # Tải file từ Hugging Face Hub | |
| # try: | |
| # vocab_file = hf_hub_download(repo_id="nguyensu27/TTS", filename="vocab.txt") | |
| # ckpt_file = hf_hub_download(repo_id="nguyensu27/TTS", filename="model_last.pt") | |
| # except Exception as e: | |
| # return None, f"Lỗi khi tải model/vocab từ Hugging Face: {str(e)}" | |
| # os.environ['PYTHONIOENCODING'] = 'utf-8' | |
| # env = os.environ.copy() | |
| # env['PYTHONPATH'] = os.path.abspath(os.path.join(current_dir, 'src')) | |
| # command = [ | |
| # sys.executable, | |
| # infer_cli_path, | |
| # "--model", model, | |
| # "--ref_audio", ref_audio_path, | |
| # "--ref_text", ref_text, | |
| # "--gen_text", gen_text, | |
| # "--speed", str(speed), | |
| # "--vocoder_name", vocoder_name, | |
| # "--vocab_file", vocab_file, | |
| # "--ckpt_file", ckpt_file | |
| # ] | |
| # print(f"Running command: {' '.join(command)}") | |
| # try: | |
| # result = subprocess.run( | |
| # command, | |
| # check=True, | |
| # capture_output=True, | |
| # text=True, | |
| # env=env | |
| # ) | |
| # print("Subprocess stdout:", result.stdout) | |
| # if os.path.exists(tests_dir): | |
| # wav_files = [f for f in os.listdir(tests_dir) if f.endswith('.wav')] | |
| # if wav_files: | |
| # latest_wav = max(wav_files, key=lambda x: os.path.getmtime(os.path.join(tests_dir, x))) | |
| # output_wav = os.path.join(tests_dir, latest_wav) | |
| # audio = AudioSegment.from_wav(output_wav) | |
| # output_mp3 = os.path.join(tests_dir, "output.mp3") | |
| # audio.export(output_mp3, format="mp3") | |
| # return output_mp3, "Suy luận thành công!" | |
| # return None, "Không tìm thấy file âm thanh trong thư mục tests" | |
| # except subprocess.CalledProcessError as e: | |
| # return None, f"Lỗi khi chạy infer_cli.py: {e.stderr}" | |
| # except Exception as e: | |
| # return None, str(e) | |
| # def generate_speech(ref_audio, ref_text, gen_text, speed, model): | |
| # if ref_audio is None: | |
| # return None, "Vui lòng tải lên file audio tham chiếu!" | |
| # ref_audio_path = f"temp_ref_{int(time.time())}.wav" | |
| # ref_audio.convert_audio_channels(1) # Chuyển sang mono | |
| # ref_audio.export(ref_audio_path, format="wav") | |
| # output_mp3, message = run_f5_tts(ref_audio_path, ref_text, gen_text, model, float(speed)) | |
| # os.remove(ref_audio_path) | |
| # if output_mp3 and os.path.exists(output_mp3): | |
| # return output_mp3, message | |
| # return None, message | |
| # interface = gr.Interface( | |
| # fn=generate_speech, | |
| # inputs=[ | |
| # gr.Audio(type="filepath", label="Tải lên file audio tham chiếu (.wav hoặc .mp3)"), | |
| # gr.Textbox(label="Text tham chiếu", placeholder="Nhập text của audio tham chiếu"), | |
| # gr.Textbox(label="Text cần sinh", placeholder="Nhập text bạn muốn sinh"), | |
| # gr.Slider(minimum=0.5, maximum=2.0, value=1.0, label="Tốc độ"), | |
| # gr.Dropdown(choices=["F5TTS_Base"], value="F5TTS_Base", label="Mô hình") | |
| # ], | |
| # outputs=[ | |
| # gr.Audio(type="filepath", label="Kết quả audio (.mp3)"), | |
| # gr.Textbox(label="Trạng thái") | |
| # ], | |
| # title="F5-TTS Suy luận", | |
| # description="Tải lên audio tham chiếu, nhập text, và sinh audio mới với F5-TTS." | |
| # ) | |
| # if __name__ == "__main__": | |
| # interface.launch(server_name="0.0.0.0", server_port=7860) | |
| # from flask import Flask, request, send_file | |
| # import subprocess | |
| # import os | |
| # import sys | |
| # from huggingface_hub import hf_hub_download | |
| # from pydub import AudioSegment | |
| # sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), 'src'))) | |
| # app = Flask(__name__) | |
| # def run_f5_tts(ref_audio_path, ref_text, gen_text, model="F5TTS_Base", speed=1.2, vocoder_name="vocos"): | |
| # current_dir = os.path.dirname(os.path.abspath(__file__)) | |
| # infer_cli_path = os.path.join(current_dir, "src", "f5_tts", "infer", "infer_cli.py") | |
| # tests_dir = os.path.join(current_dir, "tests") | |
| # vocab_file = hf_hub_download(repo_id="nguyensu27/TTS", filename="vocab.txt") | |
| # ckpt_file = hf_hub_download(repo_id="nguyensu27/TTS", filename="model_last.pt") | |
| # os.environ['PYTHONIOENCODING'] = 'utf-8' | |
| # env = os.environ.copy() | |
| # env['PYTHONPATH'] = os.path.abspath(os.path.join(current_dir, 'src')) | |
| # command = [ | |
| # sys.executable, | |
| # infer_cli_path, | |
| # "--model", model, | |
| # "--ref_audio", ref_audio_path, | |
| # "--ref_text", ref_text, | |
| # "--gen_text", gen_text, | |
| # "--speed", str(speed), | |
| # "--vocoder_name", vocoder_name, | |
| # "--vocab_file", vocab_file, | |
| # "--ckpt_file", ckpt_file | |
| # ] | |
| # try: | |
| # result = subprocess.run( | |
| # command, | |
| # check=True, | |
| # capture_output=True, | |
| # text=True, | |
| # encoding='utf-8', | |
| # env=env | |
| # ) | |
| # if os.path.exists(tests_dir): | |
| # wav_files = [f for f in os.listdir(tests_dir) if f.endswith('.wav')] | |
| # if wav_files: | |
| # latest_wav = max(wav_files, key=lambda x: os.path.getmtime(os.path.join(tests_dir, x))) | |
| # output_wav = os.path.join(tests_dir, latest_wav) | |
| # audio = AudioSegment.from_wav(output_wav) | |
| # output_mp3 = os.path.join(tests_dir, "output.mp3") | |
| # audio.export(output_mp3, format="mp3") | |
| # return True, output_mp3 | |
| # return False, "Không tìm thấy file âm thanh trong thư mục tests" | |
| # except subprocess.CalledProcessError as e: | |
| # return False, f"Lỗi khi chạy infer_cli.py: {e.stderr}" | |
| # except Exception as e: | |
| # return False, str(e) | |
| # @app.route('/') | |
| # def home(): | |
| # return "F5-TTS API is running. Use POST /api/generate to generate audio." | |
| # @app.route('/api/generate', methods=['POST']) | |
| # def generate_speech(): | |
| # if 'ref_audio' not in request.files: | |
| # return {"error": "Missing ref_audio"}, 400 | |
| # ref_audio = request.files['ref_audio'] | |
| # ref_text = request.form.get('ref_text', '') | |
| # gen_text = request.form.get('gen_text', '') | |
| # model = request.form.get('model', 'F5TTS_Base') | |
| # speed = float(request.form.get('speed', 1.2)) | |
| # import time | |
| # ref_audio_path = f"temp_ref_{int(time.time())}.wav" | |
| # ref_audio.save(ref_audio_path) | |
| # success, result = run_f5_tts(ref_audio_path, ref_text, gen_text, model, speed) | |
| # os.remove(ref_audio_path) | |
| # if success: | |
| # return send_file(result, mimetype='audio/mpeg') | |
| # else: | |
| # return {"error": result}, 500 | |
| # if __name__ == "__main__": | |
| # port = int(os.environ.get("PORT", 7860)) | |
| # app.run(host="0.0.0.0", port=port, debug=False) | |
| # from flask import Flask, request, send_file | |
| # import subprocess | |
| # import os | |
| # import sys | |
| # from huggingface_hub import hf_hub_download | |
| # sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), 'src'))) | |
| # app = Flask(__name__) | |
| # # ========================= | |
| # # Hàm chạy F5-TTS | |
| # # ========================= | |
| # def run_f5_tts(ref_audio_path, ref_text, gen_text, model="F5TTS_Base", speed=1.2, vocoder_name="vocos"): | |
| # current_dir = os.path.dirname(os.path.abspath(__file__)) | |
| # infer_cli_path = os.path.join(current_dir, "src", "f5_tts", "infer", "infer_cli.py") | |
| # tests_dir = os.path.join(current_dir, "tests") | |
| # # Dùng huggingface_hub để tải file model và vocab từ repo 'nguyensu27/TTS' | |
| # vocab_file = hf_hub_download(repo_id="nguyensu27/TTS", filename="vocab.txt") | |
| # ckpt_file = hf_hub_download(repo_id="nguyensu27/TTS", filename="model_last.pt") | |
| # os.environ['PYTHONIOENCODING'] = 'utf-8' | |
| # command = [ | |
| # sys.executable, | |
| # infer_cli_path, | |
| # "--model", model, | |
| # "--ref_audio", ref_audio_path, | |
| # "--ref_text", ref_text, | |
| # "--gen_text", gen_text, | |
| # "--speed", str(speed), | |
| # "--vocoder_name", vocoder_name, | |
| # "--vocab_file", vocab_file, | |
| # "--ckpt_file", ckpt_file | |
| # ] | |
| # try: | |
| # result = subprocess.run( | |
| # command, | |
| # check=True, | |
| # capture_output=True, | |
| # text=True, | |
| # encoding='utf-8' | |
| # ) | |
| # if os.path.exists(tests_dir): | |
| # wav_files = [f for f in os.listdir(tests_dir) if f.endswith('.wav')] | |
| # if wav_files: | |
| # latest_wav = max( | |
| # wav_files, key=lambda x: os.path.getmtime(os.path.join(tests_dir, x)) | |
| # ) | |
| # output_file = os.path.join(tests_dir, latest_wav) | |
| # return True, output_file | |
| # return False, "Không tìm thấy file âm thanh trong thư mục tests" | |
| # except subprocess.CalledProcessError as e: | |
| # return False, e.stderr | |
| # except Exception as e: | |
| # return False, str(e) | |
| # # ========================= | |
| # # Routes | |
| # # ========================= | |
| # @app.route('/') | |
| # def home(): | |
| # return "F5-TTS API is running. Use POST /api/generate to generate audio." | |
| # @app.route('/api/generate', methods=['POST']) | |
| # def generate_speech(): | |
| # if 'ref_audio' not in request.files: | |
| # return {"error": "Missing ref_audio"}, 400 | |
| # ref_audio = request.files['ref_audio'] | |
| # ref_text = request.form.get('ref_text', '') | |
| # gen_text = request.form.get('gen_text', '') | |
| # model = request.form.get('model', 'F5TTS_Base') | |
| # speed = float(request.form.get('speed', 1.2)) | |
| # ref_audio_path = 'temp_ref.wav' | |
| # ref_audio.save(ref_audio_path) | |
| # success, result = run_f5_tts(ref_audio_path, ref_text, gen_text, model, speed) | |
| # os.remove(ref_audio_path) | |
| # if success: | |
| # return send_file(result, mimetype='audio/wav') | |
| # else: | |
| # return {"error": result}, 500 | |
| # # ========================= | |
| # # Main | |
| # # ========================= | |
| # if __name__ == "__main__": | |
| # port = int(os.environ.get("PORT", 7860)) | |
| # app.run(host="0.0.0.0", port=port, debug=False) | |