Update handler.py
Browse files- handler.py +21 -23
handler.py
CHANGED
@@ -1,37 +1,35 @@
|
|
1 |
-
from typing import Dict
|
2 |
-
from transformers import pipeline
|
3 |
import torch
|
|
|
|
|
4 |
import soundfile as sf
|
5 |
-
import
|
6 |
|
7 |
class EndpointHandler:
|
8 |
-
def __init__(self
|
9 |
-
self.
|
10 |
self.embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
11 |
|
12 |
-
def __call__(self, data
|
13 |
text = data.get("inputs", "")
|
|
|
14 |
speaker_embedding = torch.tensor(self.embeddings_dataset[7306]["xvector"]).unsqueeze(0)
|
|
|
|
|
15 |
|
16 |
-
#
|
17 |
-
|
18 |
|
19 |
-
#
|
20 |
-
|
21 |
-
sf.write(
|
22 |
-
audio_buffer.seek(0)
|
23 |
-
audio_wav = audio_buffer.read()
|
24 |
-
|
25 |
-
# Prepare the response headers.
|
26 |
-
headers = {
|
27 |
-
"Content-Type": "audio/wav"
|
28 |
-
}
|
29 |
|
30 |
-
#
|
31 |
-
|
32 |
"statusCode": 200,
|
33 |
-
"body":
|
34 |
-
|
|
|
|
|
35 |
}
|
36 |
|
37 |
-
|
|
|
|
|
|
|
1 |
import torch
|
2 |
+
from transformers import pipeline
|
3 |
+
from datasets import load_dataset
|
4 |
import soundfile as sf
|
5 |
+
from huggingface_hub.inference_api import InferenceApi
|
6 |
|
7 |
class EndpointHandler:
|
8 |
+
def __init__(self):
|
9 |
+
self.api = InferenceApi(repo_id="microsoft/speecht5_tts", task="text-to-speech")
|
10 |
self.embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
11 |
|
12 |
+
def __call__(self, data):
|
13 |
text = data.get("inputs", "")
|
14 |
+
# Extract speaker_embedding using the index from your dataset, or replace with your own logic.
|
15 |
speaker_embedding = torch.tensor(self.embeddings_dataset[7306]["xvector"]).unsqueeze(0)
|
16 |
+
# Convert embedding to list to avoid serialization issues
|
17 |
+
speaker_embedding_list = speaker_embedding.tolist()
|
18 |
|
19 |
+
# Use the API to run the model
|
20 |
+
response = self.api(inputs=text, parameters={"forward_params": {"speaker_embeddings": speaker_embedding_list}}, options={"wait_for_model": True})
|
21 |
|
22 |
+
# Write the response audio to a file
|
23 |
+
# Note: This might not be possible in all environments, ensure this is suitable for your deployment
|
24 |
+
sf.write("speech.wav", response["audio"], samplerate=response["sampling_rate"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
+
# Return the expected response format
|
27 |
+
return {
|
28 |
"statusCode": 200,
|
29 |
+
"body": {
|
30 |
+
"audio": response["audio"], # Consider encoding this to a suitable format
|
31 |
+
"sampling_rate": response["sampling_rate"]
|
32 |
+
}
|
33 |
}
|
34 |
|
35 |
+
handler = EndpointHandler()
|