File size: 4,186 Bytes
c35a6d8 130885f 8c99301 c35a6d8 8c99301 c35a6d8 8c99301 c35a6d8 8c99301 130885f c35a6d8 8c99301 130885f c35a6d8 8c99301 804826d 8c99301 804826d 8c99301 804826d 8c99301 804826d 8c99301 c35a6d8 130885f 8c99301 130885f 8c99301 130885f 8c99301 c35a6d8 8c99301 130885f c35a6d8 8c99301 130885f c35a6d8 8c99301 130885f c35a6d8 8c99301 c35a6d8 8c99301 c35a6d8 8c99301 c35a6d8 8c99301 c35a6d8 8c99301 c35a6d8 8c99301 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import gradio as gr
from TTS.api import TTS
import torch
import os
from pydub import AudioSegment
# CSS for warnings and styling
css = """
#warning {background-color: #FFCCCB !important}
.feedback label textarea {
height: auto !important;
font-size: 22px !important;
font-weight: 800 !important;
text-align: center !important;
color: #801313 !important;
padding: 0px !important
}
#alert {background-color: #fff !important}
"""
# Use GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load models
tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=(device=="cuda"))
tts.to(device)
# Convert mp3 to wav
def convert_mp3_to_wav(mp3_path: str) -> str:
wav_path = mp3_path.replace(".mp3", ".wav")
audio = AudioSegment.from_mp3(mp3_path)
audio.export(wav_path, format="wav")
return wav_path
# Voice cloning function
def text_to_speech(text: str, speaker_wav: str, speaker_wav_file: str):
text = text.strip().replace("\n", " ")
speaker_audio = speaker_wav_file or speaker_wav
if not text:
return None, "⚠️ Error: Text input is empty."
if not speaker_audio or not os.path.exists(speaker_audio):
return None, "⚠️ Error: No valid speaker audio provided."
if speaker_audio.endswith(".mp3"):
try:
speaker_audio = convert_mp3_to_wav(speaker_audio)
except Exception as e:
return None, f"⚠️ Error converting MP3 to WAV: {str(e)}"
output_path = "output.wav"
try:
tts.tts_to_file(text=text, speaker_wav=speaker_audio, language="en", file_path=output_path)
if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
return output_path, ""
else:
return None, "⚠️ Error: Audio was not generated."
except Exception as e:
return None, f"⚠️ Error during synthesis: {str(e)}"
# Toggle mic/file input visibility
def toggle(choice: str):
return (
gr.update(visible=(choice == "mic"), value=None),
gr.update(visible=(choice != "mic"), value=None)
)
# Change alert style based on input
def change_color(text_input: str):
return gr.update(elem_id="warning" if len(text_input) == 0 else "alert", autofocus=(len(text_input) == 0))
# Reset fields
def clear_color(text_input: str, radio: str, error_box: str):
return gr.update(elem_id="alert"), gr.update(value="mic"), gr.update(visible=False)
# Show error or success
def show_error(text: str):
return (
gr.update(visible=(text == ""), elem_id="warning", elem_classes="feedback"),
gr.update(visible=(text != ""))
)
# Gradio UI
with gr.Blocks(css=css) as demo:
with gr.Row():
with gr.Column():
text_input = gr.Textbox(label="Enter text to clone", value="", max_lines=4, lines=4)
radio = gr.Radio(["mic", "file"], value="mic", label="Upload speaker audio")
audio_input_mic = gr.Audio(label="Use Microphone", sources="microphone", type="filepath", visible=True)
audio_input_file = gr.Audio(label="Upload File (.wav/.mp3)", type="filepath", visible=False)
with gr.Row():
with gr.Column():
btn_clear = gr.ClearButton([text_input, radio, audio_input_file])
with gr.Column():
btn = gr.Button("Generate Voice", variant="primary")
with gr.Column():
audio_output = gr.Audio(label="Generated Voice", visible=True, autoplay=True, show_share_button=False)
error_box = gr.Textbox(label="Status", value="Input box cannot be blank!!", visible=False, container=True)
# Event bindings
btn_clear.add(audio_output)
btn.click(text_to_speech, inputs=[text_input, audio_input_mic, audio_input_file], outputs=[audio_output, error_box])
btn.click(show_error, text_input, [error_box, audio_output])
radio.change(toggle, radio, [audio_input_mic, audio_input_file])
btn_clear.click(clear_color, [text_input, radio, error_box], [text_input, radio, error_box])
btn.click(change_color, text_input, text_input)
# Launch the app
demo.launch()
|