Naksh786 commited on
Commit
03088e5
·
verified ·
1 Parent(s): e6d1a2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -12
app.py CHANGED
@@ -1,19 +1,24 @@
1
  import gradio as gr
2
- from transformers import AutoModelForTextToSpeech, AutoTokenizer
3
  import torch
 
 
4
 
5
- # Load the model and tokenizer
6
- model = AutoModelForTextToSpeech.from_pretrained("parler-tts/parler_tts")
7
- tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler_tts")
 
 
 
8
 
9
- # Define the TTS function
10
  def text_to_speech(text):
11
- inputs = tokenizer(text, return_tensors="pt")
12
- speech = model.generate(**inputs)
13
-
14
- # Convert to an audio format, e.g., 16kHz waveform
15
- audio = speech[0].cpu().detach().numpy()
16
- return 16000, audio # Sample rate, waveform
 
17
 
18
  # Gradio interface
19
  interface = gr.Interface(
@@ -21,7 +26,7 @@ interface = gr.Interface(
21
  inputs="text",
22
  outputs="audio",
23
  title="Text to Speech",
24
- description="Convert text to speech using the parler-tts/parler_tts model"
25
  )
26
 
27
  interface.launch()
 
1
  import gradio as gr
2
+ from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor
3
  import torch
4
+ import torchaudio
5
+ import tempfile
6
 
7
+ # Load model and processor
8
+ processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
9
+ model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
10
+
11
+ # Load a voice embedding (necessary for the SpeechT5 model)
12
+ speaker_embedding, _ = torchaudio.load("https://huggingface.co/microsoft/speecht5_tts/blob/main/speaker_embeddings/english/vctk_speaker_0.pt")
13
 
 
14
  def text_to_speech(text):
15
+ inputs = processor(text, return_tensors="pt")
16
+ speech = model.generate_speech(inputs["input_ids"], speaker_embedding)
17
+
18
+ # Save the output to a temporary file
19
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
20
+ torchaudio.save(f.name, speech, 16000)
21
+ return f.name
22
 
23
  # Gradio interface
24
  interface = gr.Interface(
 
26
  inputs="text",
27
  outputs="audio",
28
  title="Text to Speech",
29
+ description="Convert text to speech using the microsoft/speecht5_tts model"
30
  )
31
 
32
  interface.launch()