Dupaja
/

speecht5_tts

Inference Endpoints

Model card Files Files and versions Community

speecht5_tts / handler.py

Dupaja's picture

Update handler.py

a4c5e15 about 1 year ago

1.61 kB

	import torch
	from transformers import pipeline
	from datasets import load_dataset
	import soundfile as sf
	from huggingface_hub.inference_api import InferenceApi
	from typing import Dict, List, Any


	class EndpointHandler:
	def __init__(self, path=""):
	self.api = InferenceApi(repo_id="microsoft/speecht5_tts", task="text-to-speech")
	self.embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")

	def __call__(self, data):

	text = data.get("inputs", "")
	# Extract speaker_embedding using the index from your dataset, or replace with your own logic.
	speaker_embedding = torch.tensor(self.embeddings_dataset[7306]["xvector"]).unsqueeze(0)
	# Convert embedding to list to avoid serialization issues
	speaker_embedding_list = speaker_embedding.tolist()


	# Use the API to run the model
	#parameters={"forward_params": {"speaker_embeddings": speaker_embedding_list}}
	response = self.api(inputs=text, options={"wait_for_model": True})

	# Write the response audio to a file
	# Note: This might not be possible in all environments, ensure this is suitable for your deployment
	sf.write("speech.wav", response["audio"], samplerate=response["sampling_rate"])

	# Return the expected response format
	return {
	"statusCode": 200,
	"body": {
	"audio": response["audio"], # Consider encoding this to a suitable format
	"sampling_rate": response["sampling_rate"]
	}
	}

	handler = EndpointHandler()