File size: 1,164 Bytes
8869945 d65f95a aaf0168 7b5f670 aaf0168 7b5f670 8869945 aaf0168 8869945 aaf0168 8869945 a4c5e15 8869945 aaf0168 d65f95a 8869945 74cf751 d65f95a aaf0168 d65f95a 8869945 d65f95a aaf0168 d65f95a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
from huggingface_hub import InferenceClient
from datasets import load_dataset
import soundfile as sf
from typing import Dict, List, Any
class EndpointHandler:
def __init__(self, path=""):
self.client = InferenceClient(repo_id="microsoft/speecht5_tts", task="text-to-speech")
self.embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
text = data.get("inputs", "")
speaker_embedding = self.embeddings_dataset['xvector'][7306].unsqueeze(0).tolist()
response = self.client(payload={"inputs": text, "forward_params": {"speaker_embeddings": speaker_embedding}}, options={"wait_for_model": True})
# Write the response audio to a file
sf.write("speech.wav", response.audio, response.sampling_rate)
# Return the expected response format
return {
"statusCode": 200,
"body": {
"audio": response.audio, # Consider encoding this to a suitable format
"sampling_rate": response.sampling_rate
}
}
handler = EndpointHandler() |