Spaces:

ccibeekeoc42
/

Aware-Demo

Sleeping

App Files Files Community

ccibeekeoc42 commited on Feb 10

Commit

df3c1d7

verified ·

1 Parent(s): 1111f2b

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -20

app.py CHANGED Viewed

@@ -83,6 +83,25 @@ generate_llm_response("Explain Deep Learning in Igbo")
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 pipe = pipeline("automatic-speech-recognition", model="okezieowen/whisper-small-multilingual-naija-11-03-2024", device=device)
 # Take audio and return translated text
 def transcribe(audio):
     outputs = pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe"})
@@ -206,26 +225,6 @@ llm_response_cleaned = normalize_text(cleanup_text(replace_numbers_with_words(ll
 print(f"LLM Response: {llm_response}")
 print(f"LLM Response Cleaned: {llm_response_cleaned}")
-# Loading the TTS and Vocoder
-from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
-from datasets import load_dataset
-processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
-model_default = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
-model = SpeechT5ForTextToSpeech.from_pretrained("ccibeekeoc42/speecht5_finetuned_naija_ig_yo_2025-01-20_O2")
-vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
-# sending the model to device
-model_default.to(device)
-model.to(device)
-vocoder.to(device)
-# Loading speaker embedings
-embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
-speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
 # returning spech from text (and bringing to CPU)
 def synthesise(text):
     inputs = processor(text=text, return_tensors="pt")

 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 pipe = pipeline("automatic-speech-recognition", model="okezieowen/whisper-small-multilingual-naija-11-03-2024", device=device)
+# Loading the TTS and Vocoder
+from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
+from datasets import load_dataset
+processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
+model_default = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
+model = SpeechT5ForTextToSpeech.from_pretrained("ccibeekeoc42/speecht5_finetuned_naija_ig_yo_2025-01-20_O2")
+vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
+# sending the model to device
+model_default.to(device)
+model.to(device)
+vocoder.to(device)
+# Loading speaker embedings
+embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
+speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
 # Take audio and return translated text
 def transcribe(audio):
     outputs = pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe"})
 print(f"LLM Response: {llm_response}")
 print(f"LLM Response Cleaned: {llm_response_cleaned}")
 # returning spech from text (and bringing to CPU)
 def synthesise(text):
     inputs = processor(text=text, return_tensors="pt")