dshamika commited on
Commit
221562b
·
verified ·
1 Parent(s): aabf02d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -24
app.py CHANGED
@@ -1,31 +1,25 @@
1
  import torch
 
2
  import gradio as gr
3
- import soundfile as sf
4
- from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
5
- from datasets import load_dataset
6
 
7
- # Load Sinhala SpeechT5 model from Hugging Face
8
- processor = SpeechT5Processor.from_pretrained("Ransaka/speecht5_tts_sinhala")
9
- model = SpeechT5ForTextToSpeech.from_pretrained("Ransaka/speecht5_tts_sinhala")
10
- vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
11
 
12
- # Load speaker embedding
13
- embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
14
- speaker_embedding = torch.tensor(embeddings_dataset[0]["xvector"]).unsqueeze(0)
 
 
 
15
 
16
- def tts(text):
17
- inputs = processor(text=text, return_tensors="pt")
18
- speech = model.generate_speech(inputs["input_ids"], speaker_embedding, vocoder=vocoder)
19
- sf.write("output.wav", speech.numpy(), samplerate=16000)
20
- return "output.wav"
21
-
22
- demo = gr.Interface(
23
- fn=tts,
24
- inputs=gr.Textbox(label="සිංහල පෙළය ඇතුළත් කරන්න", placeholder="උදා: අපිට උදව්වක් ඕනෙ."),
25
- outputs=gr.Audio(type="filepath", label="ඇසෙන්නා වූ ශබ්දය"),
26
- title="සිංහල Text-to-Speech",
27
- description="Ransaka/speecht5_tts_sinhala මොඩලය භාවිතයෙන් සිංහල වාක්‍ය ශබ්ද බවට පරිවර්තනය කරන්න"
28
  )
29
 
30
- if __name__ == "__main__":
31
- demo.launch()
 
1
  import torch
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
  import gradio as gr
 
 
 
4
 
5
+ # Load model & tokenizer
6
+ model_name = "machinelearningzuu/sinhala-text-to-speech"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
9
 
10
+ def tts_generate(text):
11
+ inputs = tokenizer(text, return_tensors="pt")
12
+ with torch.no_grad():
13
+ outputs = model.generate(**inputs)
14
+ audio = outputs[0].cpu().numpy()
15
+ return audio
16
 
17
+ iface = gr.Interface(
18
+ fn=tts_generate,
19
+ inputs=gr.Textbox(label="සිංහල පේළියක් ඇතුළත් කරන්න"),
20
+ outputs=gr.Audio(label="නිමැවුම් හඬ", type="numpy"),
21
+ title="Sinhala Text to Speech",
22
+ description="සිංහල වාක්‍යයක් ඇතුළත් කරන්න. AI හරහා හඬක් ලෙස output ලැබේ."
 
 
 
 
 
 
23
  )
24
 
25
+ iface.launch()