Remsky commited on
Commit
9aea310
·
1 Parent(s): 80c0dbf

Refactor TTSModelV1 to remove JSON voice mappings and load voices from the voices_v1 directory

Browse files
tts_model_v1.py CHANGED
@@ -1,10 +1,8 @@
1
  import os
2
- import json
3
  import torch
4
  import numpy as np
5
  import time
6
  from typing import Tuple, List
7
- import soundfile as sf
8
  from kokoro import KPipeline
9
  import spaces
10
 
@@ -13,10 +11,7 @@ class TTSModelV1:
13
 
14
  def __init__(self):
15
  self.pipeline = None
16
- # Load v1 voice mappings
17
- voice_map_path = os.path.join(os.path.dirname(__file__), "voices", "v1_voices.json")
18
- with open(voice_map_path) as f:
19
- self.voice_map = json.load(f)
20
 
21
  def initialize(self) -> bool:
22
  """Initialize KPipeline"""
@@ -30,9 +25,14 @@ class TTSModelV1:
30
  return False
31
 
32
  def list_voices(self) -> List[str]:
33
- """List available voices"""
34
- # Return all voices from voice map
35
- return self.voice_map["american"] + self.voice_map["british"]
 
 
 
 
 
36
 
37
  @spaces.GPU(duration=None) # Duration will be set by the UI
38
  def generate_speech(self, text: str, voice_names: list[str], speed: float = 1.0, gpu_timeout: int = 60, progress_callback=None, progress_state=None, progress=None) -> Tuple[np.ndarray, float]:
 
1
  import os
 
2
  import torch
3
  import numpy as np
4
  import time
5
  from typing import Tuple, List
 
6
  from kokoro import KPipeline
7
  import spaces
8
 
 
11
 
12
  def __init__(self):
13
  self.pipeline = None
14
+ self.voices_dir = os.path.join(os.path.dirname(__file__), "voices_v1")
 
 
 
15
 
16
  def initialize(self) -> bool:
17
  """Initialize KPipeline"""
 
25
  return False
26
 
27
  def list_voices(self) -> List[str]:
28
+ """List available voices from voices_v1 directory"""
29
+ voices = []
30
+ if os.path.exists(self.voices_dir):
31
+ for file in os.listdir(self.voices_dir):
32
+ if file.endswith(".pt"):
33
+ voice_name = file[:-3]
34
+ voices.append(voice_name)
35
+ return sorted(voices)
36
 
37
  @spaces.GPU(duration=None) # Duration will be set by the UI
38
  def generate_speech(self, text: str, voice_names: list[str], speed: float = 1.0, gpu_timeout: int = 60, progress_callback=None, progress_state=None, progress=None) -> Tuple[np.ndarray, float]:
voices/v1_voices.json DELETED
@@ -1,32 +0,0 @@
1
- {
2
- "american": [
3
- "af_alloy",
4
- "af_aoede",
5
- "af_bella",
6
- "af_jessica",
7
- "af_kore",
8
- "af_nicole",
9
- "af_nova",
10
- "af_river",
11
- "af_sarah",
12
- "af_sky",
13
- "am_adam",
14
- "am_echo",
15
- "am_eric",
16
- "am_fenrir",
17
- "am_liam",
18
- "am_michael",
19
- "am_onyx",
20
- "am_puck"
21
- ],
22
- "british": [
23
- "bf_alice",
24
- "bf_emma",
25
- "bf_isabella",
26
- "bf_lily",
27
- "bm_daniel",
28
- "bm_fable",
29
- "bm_george",
30
- "bm_lewis"
31
- ]
32
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
voices_v1/af_alloy.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d877149dd8b348fbad12e5845b7e43d975390e9f3b68a811d1d86168bef5aa3
3
+ size 523425
voices_v1/af_aoede.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c03bd1a4c3716c2d8eaa3d50022f62d5c31cfbd6e15933a00b17fefe13841cc4
3
+ size 523425
voices_v1/af_jessica.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdfdccb8cc975aa34ee6b89642963b0064237675de0e41a30ae64cc958dd4e87
3
+ size 523435
voices_v1/af_kore.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bfbc512321c3db49dff984ac675fa5ac7eaed5a96cc31104d3a9080e179d69d
3
+ size 523420
voices_v1/af_nova.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0233676ddc21908c37a1f102f6b88a59e4e5c1bd764983616eb9eda629dbcd2
3
+ size 523420
voices_v1/af_river.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e149459bd9c084416b74756b9bd3418256a8b839088abb07d463730c369dab8f
3
+ size 523425
voices_v1/am_echo.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bcfdc852bc985fb45c396c561e571ffb9183930071f962f1b50df5c97b161e8
3
+ size 523420
voices_v1/am_eric.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ada66f0eefff34ec921b1d7474d7ac8bec00cd863c170f1c534916e9b8212aae
3
+ size 523420
voices_v1/am_fenrir.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98e507eca1db08230ae3b6232d59c10aec9630022d19accac4f5d12fcec3c37a
3
+ size 523430
voices_v1/am_liam.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c82550757ddb31308b97f30040dda8c2d609a9e2de6135848d0a948368138518
3
+ size 523420
voices_v1/am_onyx.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8452be16cd0f6da7b4579eaf7b1e4506e92524882053d86d72b96b9a7fed584
3
+ size 523420
voices_v1/am_puck.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd1d8973f4ce4b7d8ae407c77a435f485dabc052081b80ea75c4f30b84f36223
3
+ size 523420
voices_v1/bf_alice.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d292651b6af6c0d81705c2580dcb4463fccc0ff7b8d618a471dbb4e45655b3f3
3
+ size 523425
voices_v1/bf_lily.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e09c2e481e2d53004d7e5ae7d3a325369e130a6f45c35a6002de75084be9285
3
+ size 523420
voices_v1/bm_daniel.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc3fce4e9c12ed4dbc8fa9680cfe51ee190a96444ce7c3ad647549a30823fc5d
3
+ size 523430
voices_v1/bm_fable.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d44935f3135257a9064df99f007fc1342ff1aa767552b4a4fa4c3b2e6e59079c
3
+ size 523425