Dupaja commited on
Commit
8869945
·
1 Parent(s): a4c5e15

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +8 -19
handler.py CHANGED
@@ -1,39 +1,28 @@
1
- import torch
2
- from transformers import pipeline
3
  from datasets import load_dataset
4
  import soundfile as sf
5
- from huggingface_hub.inference_api import InferenceApi
6
  from typing import Dict, List, Any
7
 
8
-
9
  class EndpointHandler:
10
  def __init__(self, path=""):
11
- self.api = InferenceApi(repo_id="microsoft/speecht5_tts", task="text-to-speech")
12
  self.embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
13
 
14
- def __call__(self, data):
15
-
16
  text = data.get("inputs", "")
17
- # Extract speaker_embedding using the index from your dataset, or replace with your own logic.
18
- speaker_embedding = torch.tensor(self.embeddings_dataset[7306]["xvector"]).unsqueeze(0)
19
- # Convert embedding to list to avoid serialization issues
20
- speaker_embedding_list = speaker_embedding.tolist()
21
-
22
 
23
- # Use the API to run the model
24
- #parameters={"forward_params": {"speaker_embeddings": speaker_embedding_list}}
25
- response = self.api(inputs=text, options={"wait_for_model": True})
26
 
27
  # Write the response audio to a file
28
- # Note: This might not be possible in all environments, ensure this is suitable for your deployment
29
- sf.write("speech.wav", response["audio"], samplerate=response["sampling_rate"])
30
 
31
  # Return the expected response format
32
  return {
33
  "statusCode": 200,
34
  "body": {
35
- "audio": response["audio"], # Consider encoding this to a suitable format
36
- "sampling_rate": response["sampling_rate"]
37
  }
38
  }
39
 
 
1
+ from huggingface_hub import InferenceClient
 
2
  from datasets import load_dataset
3
  import soundfile as sf
 
4
  from typing import Dict, List, Any
5
 
 
6
  class EndpointHandler:
7
  def __init__(self, path=""):
8
+ self.client = InferenceClient(repo_id="microsoft/speecht5_tts", task="text-to-speech")
9
  self.embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
10
 
11
+ def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
 
12
  text = data.get("inputs", "")
13
+ speaker_embedding = self.embeddings_dataset['xvector'][7306].unsqueeze(0).tolist()
 
 
 
 
14
 
15
+ response = self.client(payload={"inputs": text, "forward_params": {"speaker_embeddings": speaker_embedding}}, options={"wait_for_model": True})
 
 
16
 
17
  # Write the response audio to a file
18
+ sf.write("speech.wav", response.audio, response.sampling_rate)
 
19
 
20
  # Return the expected response format
21
  return {
22
  "statusCode": 200,
23
  "body": {
24
+ "audio": response.audio, # Consider encoding this to a suitable format
25
+ "sampling_rate": response.sampling_rate
26
  }
27
  }
28