ccibeekeoc42 commited on
Commit
df3c1d7
·
verified ·
1 Parent(s): 1111f2b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -20
app.py CHANGED
@@ -83,6 +83,25 @@ generate_llm_response("Explain Deep Learning in Igbo")
83
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
84
  pipe = pipeline("automatic-speech-recognition", model="okezieowen/whisper-small-multilingual-naija-11-03-2024", device=device)
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  # Take audio and return translated text
87
  def transcribe(audio):
88
  outputs = pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe"})
@@ -206,26 +225,6 @@ llm_response_cleaned = normalize_text(cleanup_text(replace_numbers_with_words(ll
206
  print(f"LLM Response: {llm_response}")
207
  print(f"LLM Response Cleaned: {llm_response_cleaned}")
208
 
209
- # Loading the TTS and Vocoder
210
- from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
211
- from datasets import load_dataset
212
-
213
- processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
214
-
215
- model_default = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
216
- model = SpeechT5ForTextToSpeech.from_pretrained("ccibeekeoc42/speecht5_finetuned_naija_ig_yo_2025-01-20_O2")
217
- vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
218
-
219
- # sending the model to device
220
- model_default.to(device)
221
- model.to(device)
222
- vocoder.to(device)
223
-
224
- # Loading speaker embedings
225
- embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
226
- speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
227
-
228
-
229
  # returning spech from text (and bringing to CPU)
230
  def synthesise(text):
231
  inputs = processor(text=text, return_tensors="pt")
 
83
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
84
  pipe = pipeline("automatic-speech-recognition", model="okezieowen/whisper-small-multilingual-naija-11-03-2024", device=device)
85
 
86
+ # Loading the TTS and Vocoder
87
+ from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
88
+ from datasets import load_dataset
89
+
90
+ processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
91
+
92
+ model_default = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
93
+ model = SpeechT5ForTextToSpeech.from_pretrained("ccibeekeoc42/speecht5_finetuned_naija_ig_yo_2025-01-20_O2")
94
+ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
95
+
96
+ # sending the model to device
97
+ model_default.to(device)
98
+ model.to(device)
99
+ vocoder.to(device)
100
+
101
+ # Loading speaker embedings
102
+ embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
103
+ speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
104
+
105
  # Take audio and return translated text
106
  def transcribe(audio):
107
  outputs = pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe"})
 
225
  print(f"LLM Response: {llm_response}")
226
  print(f"LLM Response Cleaned: {llm_response_cleaned}")
227
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  # returning spech from text (and bringing to CPU)
229
  def synthesise(text):
230
  inputs = processor(text=text, return_tensors="pt")