Spaces:
Runtime error
Runtime error
Update app.py
Browse filesupdated order of execution
app.py
CHANGED
@@ -1,9 +1,24 @@
|
|
1 |
import os
|
2 |
-
import openai
|
3 |
import torch
|
4 |
from transformers import pipeline
|
5 |
|
6 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
from huggingface_hub import HfFolder
|
8 |
from openai import OpenAI
|
9 |
|
@@ -12,41 +27,12 @@ if api_key is None:
|
|
12 |
raise ValueError("API_KEY is not set in the environment variables.")
|
13 |
print("API key successfully loaded.")
|
14 |
|
15 |
-
|
16 |
# Initialize OpenAI client for Hugging Face Inference Endpoint
|
17 |
client = OpenAI(
|
18 |
base_url="https://f2iozzwigntrzkve.us-east-1.aws.endpoints.huggingface.cloud/v1/",
|
19 |
api_key=api_key
|
20 |
)
|
21 |
|
22 |
-
|
23 |
-
# def generate_llm_response(text, model_id="ccibeekeoc42/Llama3.1-8b-base-SFT-2024-11-09"):
|
24 |
-
# """Generates LLM response for given text with streaming support"""
|
25 |
-
# full_response = []
|
26 |
-
|
27 |
-
# # Create streaming response
|
28 |
-
# chat_completion = client.chat.completions.create(
|
29 |
-
# model="tgi",
|
30 |
-
# messages=[
|
31 |
-
# {"role": "system", "content": "You are a BRIEF AND DIRECT assistant. A part of a speech pipeline so keep your responces short, fluent, and straight to the point. Avoid markdown in responses"},
|
32 |
-
# {"role": "user", "content": text}
|
33 |
-
# ],
|
34 |
-
# top_p=None,
|
35 |
-
# temperature=None,
|
36 |
-
# max_tokens=75,
|
37 |
-
# stream=True,
|
38 |
-
# seed=None,
|
39 |
-
# stop=None,
|
40 |
-
# frequency_penalty=None,
|
41 |
-
# presence_penalty=None
|
42 |
-
# )
|
43 |
-
# # Collect streamed response chunks
|
44 |
-
# for chunk in chat_completion:
|
45 |
-
# if chunk.choices[0].delta.content:
|
46 |
-
# full_response.append(chunk.choices[0].delta.content)
|
47 |
-
|
48 |
-
# return "".join(full_response)
|
49 |
-
|
50 |
def generate_llm_response(text, model_id="ccibeekeoc42/Llama3.1-8b-base-SFT-2024-11-09"):
|
51 |
full_response = []
|
52 |
try:
|
@@ -79,36 +65,17 @@ def generate_llm_response(text, model_id="ccibeekeoc42/Llama3.1-8b-base-SFT-2024
|
|
79 |
generate_llm_response("Explain Deep Learning in Igbo")
|
80 |
|
81 |
|
82 |
-
# Loading the ST Model (Whisper)
|
83 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
84 |
pipe = pipeline("automatic-speech-recognition", model="okezieowen/whisper-small-multilingual-naija-11-03-2024", device=device)
|
85 |
|
86 |
-
# Loading the TTS and Vocoder
|
87 |
-
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
|
88 |
-
from datasets import load_dataset
|
89 |
-
|
90 |
-
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
|
91 |
-
|
92 |
-
model_default = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
|
93 |
-
model = SpeechT5ForTextToSpeech.from_pretrained("ccibeekeoc42/speecht5_finetuned_naija_ig_yo_2025-01-20_O2")
|
94 |
-
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
|
95 |
-
|
96 |
-
# sending the model to device
|
97 |
-
model_default.to(device)
|
98 |
-
model.to(device)
|
99 |
-
vocoder.to(device)
|
100 |
-
|
101 |
-
# Loading speaker embedings
|
102 |
-
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
103 |
-
speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
|
104 |
-
|
105 |
# Take audio and return translated text
|
106 |
def transcribe(audio):
|
107 |
outputs = pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe"})
|
108 |
return outputs["text"]
|
109 |
|
110 |
|
111 |
-
# Helper Functions to Cleanup LLM Texts
|
112 |
# Replacement rules
|
113 |
import re
|
114 |
# Language-specific replacements
|
|
|
1 |
import os
|
|
|
2 |
import torch
|
3 |
from transformers import pipeline
|
4 |
|
5 |
+
# Loading the TTS and Vocoder ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
6 |
+
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
|
7 |
+
from datasets import load_dataset
|
8 |
+
|
9 |
+
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
|
10 |
+
model_default = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
|
11 |
+
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
|
12 |
+
|
13 |
+
# sending the model to device
|
14 |
+
model_default.to(device)
|
15 |
+
vocoder.to(device)
|
16 |
+
|
17 |
+
# Loading speaker embedings
|
18 |
+
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
19 |
+
speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
|
20 |
+
|
21 |
+
# The LLM Model ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
22 |
from huggingface_hub import HfFolder
|
23 |
from openai import OpenAI
|
24 |
|
|
|
27 |
raise ValueError("API_KEY is not set in the environment variables.")
|
28 |
print("API key successfully loaded.")
|
29 |
|
|
|
30 |
# Initialize OpenAI client for Hugging Face Inference Endpoint
|
31 |
client = OpenAI(
|
32 |
base_url="https://f2iozzwigntrzkve.us-east-1.aws.endpoints.huggingface.cloud/v1/",
|
33 |
api_key=api_key
|
34 |
)
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
def generate_llm_response(text, model_id="ccibeekeoc42/Llama3.1-8b-base-SFT-2024-11-09"):
|
37 |
full_response = []
|
38 |
try:
|
|
|
65 |
generate_llm_response("Explain Deep Learning in Igbo")
|
66 |
|
67 |
|
68 |
+
# Loading the ST Model (Whisper) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
69 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
70 |
pipe = pipeline("automatic-speech-recognition", model="okezieowen/whisper-small-multilingual-naija-11-03-2024", device=device)
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
# Take audio and return translated text
|
73 |
def transcribe(audio):
|
74 |
outputs = pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe"})
|
75 |
return outputs["text"]
|
76 |
|
77 |
|
78 |
+
# Helper Functions to Cleanup LLM Texts ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
79 |
# Replacement rules
|
80 |
import re
|
81 |
# Language-specific replacements
|