Update handler.py
Browse files- handler.py +7 -7
handler.py
CHANGED
@@ -3,7 +3,7 @@ import numpy as np
|
|
3 |
import torch
|
4 |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
|
5 |
from datasets import load_dataset
|
6 |
-
|
7 |
from typing import Dict, List, Any
|
8 |
|
9 |
|
@@ -34,22 +34,22 @@ class EndpointHandler:
|
|
34 |
def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
35 |
|
36 |
given_text = data.get("inputs", "")
|
37 |
-
|
|
|
|
|
38 |
inputs = self.processor(text=given_text, return_tensors="pt")
|
39 |
|
40 |
speech = self.model.generate_speech(inputs["input_ids"], self.speaker_embeddings, vocoder=self.vocoder)
|
41 |
-
|
42 |
-
#filename = "current_sample.wav"
|
43 |
|
44 |
-
|
45 |
-
#sf.write(filename, speech.numpy(), samplerate=16000)
|
46 |
|
47 |
# Return the expected response format
|
48 |
return {
|
49 |
"statusCode": 200,
|
50 |
"body": {
|
51 |
"audio": speech.numpy(), # Consider encoding this to a suitable format
|
52 |
-
"sampling_rate": 16000
|
|
|
53 |
}
|
54 |
}
|
55 |
|
|
|
3 |
import torch
|
4 |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
|
5 |
from datasets import load_dataset
|
6 |
+
import time
|
7 |
from typing import Dict, List, Any
|
8 |
|
9 |
|
|
|
34 |
def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
35 |
|
36 |
given_text = data.get("inputs", "")
|
37 |
+
|
38 |
+
start_time = time.time()
|
39 |
+
|
40 |
inputs = self.processor(text=given_text, return_tensors="pt")
|
41 |
|
42 |
speech = self.model.generate_speech(inputs["input_ids"], self.speaker_embeddings, vocoder=self.vocoder)
|
|
|
|
|
43 |
|
44 |
+
run_time = time.time() - start_time
|
|
|
45 |
|
46 |
# Return the expected response format
|
47 |
return {
|
48 |
"statusCode": 200,
|
49 |
"body": {
|
50 |
"audio": speech.numpy(), # Consider encoding this to a suitable format
|
51 |
+
"sampling_rate": 16000,
|
52 |
+
"time_to_generate": str(run_time)
|
53 |
}
|
54 |
}
|
55 |
|