Dupaja
/

speecht5_tts

Inference Endpoints

Model card Files Files and versions Community

Dupaja commited on Jan 1, 2024

Commit

aaf0168

·

1 Parent(s): df81a21

Create handler.py

Files changed (1) hide show

handler.py +37 -0

handler.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from transformers import pipeline
+import torch
+import soundfile as sf
+import base64
+import io
+class EndpointHandler:
+    def __init__(self):
+        self.synthesiser = pipeline("text-to-speech", model="microsoft/speecht5_tts")
+        self.embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
+    def __call__(self, data):
+        text = data.get("inputs", "")
+        speaker_embedding = torch.tensor(self.embeddings_dataset[7306]["xvector"]).unsqueeze(0)
+        # Generate speech using the synthesiser
+        speech = self.synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
+        # Convert numpy audio array to bytes
+        audio_bytes = io.BytesIO()
+        sf.write(audio_bytes, speech["audio"], samplerate=speech["sampling_rate"], format='WAV')
+        audio_bytes.seek(0)
+        audio_base64 = base64.b64encode(audio_bytes.read()).decode('utf-8')
+        # Create response
+        response = {
+            "statusCode": 200,
+            "body": {
+                "audio": audio_base64,
+                "sampling_rate": speech["sampling_rate"]
+            },
+            "headers": {
+                "Content-Type": "audio/wav"
+            }
+        }
+        return response