Hematej commited on
Commit
8c99301
·
verified ·
1 Parent(s): 300bc4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -63
app.py CHANGED
@@ -2,111 +2,111 @@ import gradio as gr
2
  from TTS.api import TTS
3
  import torch
4
  import os
 
5
 
 
6
  css = """
7
  #warning {background-color: #FFCCCB !important}
8
- .feedback label textarea {height: auto !important;
9
- font-size: 22px !important;
10
- font-weight: 800 !important;
11
- text-align: center !important;
12
- color: #801313 !important;
13
- padding: 0px !important}
 
 
14
  #alert {background-color: #fff !important}
15
  """
16
 
17
- # Check CPU/GPU availability before loading models
18
  device = "cuda" if torch.cuda.is_available() else "cpu"
19
 
20
- # Explicitly define `gpu` settings
21
  tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=(device=="cuda"))
22
- zh_tts = TTS(model_name="tts_models/zh-CN/baker/tacotron2-DDC-GST", progress_bar=False, gpu=(device=="cuda"))
23
- de_tts = TTS(model_name="tts_models/de/thorsten/vits", gpu=(device=="cuda"))
24
- es_tts = TTS(model_name="tts_models/es/mai/tacotron2-DDC", progress_bar=False, gpu=(device=="cuda"))
25
-
26
- # ✅ Ensure correct weight loading
27
  tts.to(device)
28
- zh_tts.to(device)
29
- de_tts.to(device)
30
- es_tts.to(device)
31
-
32
- def text_to_speech(text: str, speaker_wav: str, speaker_wav_file: str) -> str:
33
- # ✅ Sanitize input text to avoid empty processing
34
- text = text.strip().replace("\n", " ").replace(" ", " ")
35
-
36
- if not text:
37
- return "Error: No text provided."
38
 
39
- return change_aud(text, speaker_wav, speaker_wav_file)
 
 
 
 
 
40
 
41
- def change_aud(text: str, speaker_wav: str, speaker_wav_file: str) -> str:
42
- # Ensure speaker file is correctly assigned
43
- if speaker_wav_file and not speaker_wav:
44
- speaker_wav = speaker_wav_file
45
 
46
- # Validate audio input
47
- if not speaker_wav or not os.path.exists(speaker_wav):
48
- return "Error: No valid speaker audio provided."
 
49
 
50
- if speaker_wav.endswith(".mp3"):
51
- return "Error: MP3 format not supported. Convert to WAV."
 
 
 
52
 
53
- file_path = "output.wav"
54
 
55
  try:
56
- tts.tts_to_file(text, speaker_wav=speaker_wav, language="en", file_path=file_path)
57
-
58
- # Debugging print statement to confirm output generation
59
- if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
60
- print(f"Generated file path: {file_path}, Size: {os.path.getsize(file_path)} bytes")
61
- return file_path
62
  else:
63
- return "Error: Output file was not properly generated."
64
  except Exception as e:
65
- return f"Error generating cloned voice: {str(e)}"
66
-
67
- def show_error(text: str):
68
- # ✅ Ensure function returns expected outputs for Gradio UI updates
69
- return (
70
- gr.update(visible=(text == ""), elem_id="warning", elem_classes="feedback"),
71
- gr.update(visible=(text != ""))
72
- )
73
-
74
- title = "Voice-Cloning-Demo"
75
 
 
76
  def toggle(choice: str):
77
  return (
78
  gr.update(visible=(choice == "mic"), value=None),
79
  gr.update(visible=(choice != "mic"), value=None)
80
  )
81
 
 
82
  def change_color(text_input: str):
83
  return gr.update(elem_id="warning" if len(text_input) == 0 else "alert", autofocus=(len(text_input) == 0))
84
 
 
85
  def clear_color(text_input: str, radio: str, error_box: str):
86
  return gr.update(elem_id="alert"), gr.update(value="mic"), gr.update(visible=False)
87
 
88
- with gr.Blocks(css="footer {visibility: hidden}") as demo:
 
 
 
 
 
 
 
 
89
  with gr.Row():
90
  with gr.Column():
91
- text_input = gr.Textbox(label="Input the text", value="", max_lines=4, lines=4)
92
- radio = gr.Radio(["mic", "file"], value="mic", label="How would you like to upload your audio?")
93
- audio_input_mic = gr.Audio(label="Voice to clone", sources="microphone", type="filepath", visible=True)
94
- audio_input_file = gr.Audio(label="Voice to clone", type="filepath", visible=False)
95
 
96
  with gr.Row():
97
  with gr.Column():
98
  btn_clear = gr.ClearButton([text_input, radio, audio_input_file])
99
  with gr.Column():
100
- btn = gr.Button("Generate", variant="primary")
 
101
  with gr.Column():
102
- audio_output = gr.Audio(label="Output", visible=True, autoplay=True, show_share_button=False)
103
- error_box = gr.Textbox(label="WARNING", value="Input box cannot be blank!!", visible=False, container=True)
104
-
 
105
  btn_clear.add(audio_output)
106
- btn.click(text_to_speech, inputs=[text_input, audio_input_mic, audio_input_file], outputs=audio_output)
107
- btn.click(show_error, text_input, [error_box, audio_output]) # ✅ Fixed output values
108
  radio.change(toggle, radio, [audio_input_mic, audio_input_file])
109
  btn_clear.click(clear_color, [text_input, radio, error_box], [text_input, radio, error_box])
110
  btn.click(change_color, text_input, text_input)
111
 
112
- demo.launch()
 
 
2
  from TTS.api import TTS
3
  import torch
4
  import os
5
+ from pydub import AudioSegment
6
 
7
+ # CSS for warnings and styling
8
  css = """
9
  #warning {background-color: #FFCCCB !important}
10
+ .feedback label textarea {
11
+ height: auto !important;
12
+ font-size: 22px !important;
13
+ font-weight: 800 !important;
14
+ text-align: center !important;
15
+ color: #801313 !important;
16
+ padding: 0px !important
17
+ }
18
  #alert {background-color: #fff !important}
19
  """
20
 
21
+ # Use GPU if available
22
  device = "cuda" if torch.cuda.is_available() else "cpu"
23
 
24
+ # Load models
25
  tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=(device=="cuda"))
 
 
 
 
 
26
  tts.to(device)
 
 
 
 
 
 
 
 
 
 
27
 
28
+ # Convert mp3 to wav
29
+ def convert_mp3_to_wav(mp3_path: str) -> str:
30
+ wav_path = mp3_path.replace(".mp3", ".wav")
31
+ audio = AudioSegment.from_mp3(mp3_path)
32
+ audio.export(wav_path, format="wav")
33
+ return wav_path
34
 
35
+ # Voice cloning function
36
+ def text_to_speech(text: str, speaker_wav: str, speaker_wav_file: str):
37
+ text = text.strip().replace("\n", " ")
38
+ speaker_audio = speaker_wav_file or speaker_wav
39
 
40
+ if not text:
41
+ return None, "⚠️ Error: Text input is empty."
42
+ if not speaker_audio or not os.path.exists(speaker_audio):
43
+ return None, "⚠️ Error: No valid speaker audio provided."
44
 
45
+ if speaker_audio.endswith(".mp3"):
46
+ try:
47
+ speaker_audio = convert_mp3_to_wav(speaker_audio)
48
+ except Exception as e:
49
+ return None, f"⚠️ Error converting MP3 to WAV: {str(e)}"
50
 
51
+ output_path = "output.wav"
52
 
53
  try:
54
+ tts.tts_to_file(text=text, speaker_wav=speaker_audio, language="en", file_path=output_path)
55
+ if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
56
+ return output_path, ""
 
 
 
57
  else:
58
+ return None, "⚠️ Error: Audio was not generated."
59
  except Exception as e:
60
+ return None, f"⚠️ Error during synthesis: {str(e)}"
 
 
 
 
 
 
 
 
 
61
 
62
+ # Toggle mic/file input visibility
63
  def toggle(choice: str):
64
  return (
65
  gr.update(visible=(choice == "mic"), value=None),
66
  gr.update(visible=(choice != "mic"), value=None)
67
  )
68
 
69
+ # Change alert style based on input
70
  def change_color(text_input: str):
71
  return gr.update(elem_id="warning" if len(text_input) == 0 else "alert", autofocus=(len(text_input) == 0))
72
 
73
+ # Reset fields
74
  def clear_color(text_input: str, radio: str, error_box: str):
75
  return gr.update(elem_id="alert"), gr.update(value="mic"), gr.update(visible=False)
76
 
77
+ # Show error or success
78
+ def show_error(text: str):
79
+ return (
80
+ gr.update(visible=(text == ""), elem_id="warning", elem_classes="feedback"),
81
+ gr.update(visible=(text != ""))
82
+ )
83
+
84
+ # Gradio UI
85
+ with gr.Blocks(css=css) as demo:
86
  with gr.Row():
87
  with gr.Column():
88
+ text_input = gr.Textbox(label="Enter text to clone", value="", max_lines=4, lines=4)
89
+ radio = gr.Radio(["mic", "file"], value="mic", label="Upload speaker audio")
90
+ audio_input_mic = gr.Audio(label="Use Microphone", sources="microphone", type="filepath", visible=True)
91
+ audio_input_file = gr.Audio(label="Upload File (.wav/.mp3)", type="filepath", visible=False)
92
 
93
  with gr.Row():
94
  with gr.Column():
95
  btn_clear = gr.ClearButton([text_input, radio, audio_input_file])
96
  with gr.Column():
97
+ btn = gr.Button("Generate Voice", variant="primary")
98
+
99
  with gr.Column():
100
+ audio_output = gr.Audio(label="Generated Voice", visible=True, autoplay=True, show_share_button=False)
101
+ error_box = gr.Textbox(label="Status", value="Input box cannot be blank!!", visible=False, container=True)
102
+
103
+ # Event bindings
104
  btn_clear.add(audio_output)
105
+ btn.click(text_to_speech, inputs=[text_input, audio_input_mic, audio_input_file], outputs=[audio_output, error_box])
106
+ btn.click(show_error, text_input, [error_box, audio_output])
107
  radio.change(toggle, radio, [audio_input_mic, audio_input_file])
108
  btn_clear.click(clear_color, [text_input, radio, error_box], [text_input, radio, error_box])
109
  btn.click(change_color, text_input, text_input)
110
 
111
+ # Launch the app
112
+ demo.launch()