yueyulin commited on
Commit
9eb14dd
·
verified ·
1 Parent(s): 7311e1a

Upload rwkv7-0.4B-g1-respark-voice-tunable_ipa/properties_util.py with huggingface_hub

Browse files
rwkv7-0.4B-g1-respark-voice-tunable_ipa/properties_util.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SPEED_MAP = {
2
+ "very_slow": "SPCT_1",
3
+ "slow": "SPCT_2",
4
+ "medium": "SPCT_3",
5
+ "fast": "SPCT_4",
6
+ "very_fast": "SPCT_5",
7
+ }
8
+
9
+ PITCH_MAP = {
10
+ "low_pitch": "SPCT_6",
11
+ "medium_pitch": "SPCT_7",
12
+ "high_pitch": "SPCT_8",
13
+ "very_high_pitch": "SPCT_9",
14
+ }
15
+
16
+ AGE_MAP = {
17
+ "child": "SPCT_13",
18
+ "teenager": "SPCT_14",
19
+ "youth-adult": "SPCT_15",
20
+ "middle-aged": "SPCT_16",
21
+ "elderly": "SPCT_17",
22
+ }
23
+
24
+
25
+ EMOTION_MAP = {
26
+ "UNKNOWN": "SPCT_21",
27
+ "NEUTRAL": "SPCT_22",
28
+ "ANGRY": "SPCT_23",
29
+ "HAPPY": "SPCT_24",
30
+ "SAD": "SPCT_25",
31
+ "FEARFUL": "SPCT_26",
32
+ "DISGUSTED": "SPCT_27",
33
+ "SURPRISED": "SPCT_28",
34
+ "SARCASTIC": "SPCT_29",
35
+ "EXCITED": "SPCT_30",
36
+ "SLEEPY": "SPCT_31",
37
+ "CONFUSED": "SPCT_32",
38
+ "EMPHASIS": "SPCT_33",
39
+ "LAUGHING": "SPCT_34",
40
+ "SINGING": "SPCT_35",
41
+ "WORRIED": "SPCT_36",
42
+ "WHISPER": "SPCT_37",
43
+ "ANXIOUS": "SPCT_38",
44
+ "NO-AGREEMENT": "SPCT_39",
45
+ "APOLOGETIC": "SPCT_40",
46
+ "CONCERNED": "SPCT_41",
47
+ "ENUNCIATED": "SPCT_42",
48
+ "ASSERTIVE": "SPCT_43",
49
+ "ENCOURAGING": "SPCT_44",
50
+ "CONTEMPT": "SPCT_45",
51
+ }
52
+
53
+ # 注意:这里有两个GENDER_MAP定义,第二个会覆盖第一个
54
+ # 第一个定义包含了"unknown",第二个只包含"female"和"male"
55
+ # 建议使用第二个定义,因为它更简洁且符合实际使用场景
56
+ GENDER_MAP = {
57
+ "female": "SPCT_46",
58
+ "male": "SPCT_47"
59
+ }
60
+
61
+ def convert_standard_properties_to_tokens(age: str, gender: str, emotion: str, pitch: str, speed: str) -> list:
62
+ age_token = AGE_MAP[age.lower()]
63
+ gender_token = GENDER_MAP[gender.lower()]
64
+ emotion_token = EMOTION_MAP[emotion.upper()]
65
+ pitch_token = PITCH_MAP[pitch.lower()]
66
+ speed_token = SPEED_MAP[speed.lower()]
67
+ return "SPCT_0"+age_token+gender_token+emotion_token+pitch_token+speed_token
68
+
69
+ def convert_properties_to_tokens(age: str, gender: str, emotion: str, pitch: float, speed: float) -> list:
70
+ age_token = AGE_MAP[age.lower()]
71
+ gender_token = GENDER_MAP[gender.lower()]
72
+ emotion_token = EMOTION_MAP[emotion.upper()]
73
+ pitch_token = PITCH_MAP[classify_pitch(pitch, gender.lower(), age.lower())]
74
+ speed_token = SPEED_MAP[classify_speed(speed)]
75
+ return "SPCT_0"+age_token+gender_token+emotion_token+pitch_token+speed_token
76
+
77
+ def classify_speed(speed: float) -> str:
78
+ if speed <= 3.5:
79
+ return "very_slow"
80
+ elif 3.5 < speed < 4.0:
81
+ return "slow"
82
+ elif 4.0 < speed <= 4.5:
83
+ return "medium"
84
+ elif 4.5 < speed <= 5.0:
85
+ return "fast"
86
+ else: # speed >= 5.0
87
+ return "very_fast"
88
+ def classify_pitch(pitch: float, gender: str, age: str) -> str:
89
+ """
90
+ 根据性别和年龄重新划分pitch区间
91
+ 基于统计结果:
92
+ - female: 平均212.08, 中位数208.76, 25%分位数187.40, 75%分位数232.08
93
+ - male: 平均136.22, 中位数129.65, 25%分位数113.76, 75%分位数151.42
94
+ """
95
+ gender = gender.lower()
96
+ age = age.lower()
97
+
98
+ # 女性分类
99
+ if gender == "female":
100
+ if age == "child":
101
+ # Child: 平均280.12, 中位数279.34, 范围216.91-324.25
102
+ if pitch < 250:
103
+ return "low_pitch"
104
+ elif pitch < 290:
105
+ return "medium_pitch"
106
+ else:
107
+ return "high_pitch"
108
+ elif age == "teenager":
109
+ # Teenager: 平均240.61, 中位数238.43, 25%分位数207.54, 75%分位数270.12
110
+ if pitch < 208:
111
+ return "low_pitch"
112
+ elif pitch < 238:
113
+ return "medium_pitch"
114
+ elif pitch < 270:
115
+ return "high_pitch"
116
+ else:
117
+ return "very_high_pitch"
118
+ elif age == "youth-adult":
119
+ # Youth-Adult: 平均213.26, 中位数210.99, 25%分位数190.81, 75%分位数232.24
120
+ if pitch < 191:
121
+ return "low_pitch"
122
+ elif pitch < 211:
123
+ return "medium_pitch"
124
+ elif pitch < 232:
125
+ return "high_pitch"
126
+ else:
127
+ return "very_high_pitch"
128
+ elif age == "middle-aged":
129
+ # Middle-aged: 平均197.68, 中位数195.01, 25%分位数176.34, 75%分位数215.22
130
+ if pitch < 176:
131
+ return "low_pitch"
132
+ elif pitch < 195:
133
+ return "medium_pitch"
134
+ elif pitch < 215:
135
+ return "high_pitch"
136
+ else:
137
+ return "very_high_pitch"
138
+ elif age == "elderly":
139
+ # Elderly: 平均194.91, 中位数189.90, 25%分位数170.42, 75%分位数213.41
140
+ if pitch < 170:
141
+ return "low_pitch"
142
+ elif pitch < 190:
143
+ return "medium_pitch"
144
+ elif pitch < 213:
145
+ return "high_pitch"
146
+ else:
147
+ return "very_high_pitch"
148
+ else:
149
+ # 默认女性分类
150
+ if pitch < 187:
151
+ return "low_pitch"
152
+ elif pitch < 209:
153
+ return "medium_pitch"
154
+ elif pitch < 232:
155
+ return "high_pitch"
156
+ else:
157
+ return "very_high_pitch"
158
+
159
+ # 男性分类
160
+ elif gender == "male":
161
+ if age == "teenager":
162
+ # Teenager: 平均150.93, 中位数142.50, 25%分位数121.47, 75%分位数165.55
163
+ if pitch < 121:
164
+ return "low_pitch"
165
+ elif pitch < 143:
166
+ return "medium_pitch"
167
+ elif pitch < 166:
168
+ return "high_pitch"
169
+ else:
170
+ return "very_high_pitch"
171
+ elif age == "youth-adult":
172
+ # Youth-Adult: 平均137.17, 中位数130.92, 25%分位数114.70, 75%分位数153.18
173
+ if pitch < 115:
174
+ return "low_pitch"
175
+ elif pitch < 131:
176
+ return "medium_pitch"
177
+ elif pitch < 153:
178
+ return "high_pitch"
179
+ else:
180
+ return "very_high_pitch"
181
+ elif age == "middle-aged":
182
+ # Middle-aged: 平均132.33, 中位数125.30, 25%分位数110.31, 75%分位数146.55
183
+ if pitch < 110:
184
+ return "low_pitch"
185
+ elif pitch < 125:
186
+ return "medium_pitch"
187
+ elif pitch < 147:
188
+ return "high_pitch"
189
+ else:
190
+ return "very_high_pitch"
191
+ elif age == "elderly":
192
+ # Elderly: 平均132.62, 中位数128.42, 25%分位数114.69, 75%分位数141.57
193
+ if pitch < 115:
194
+ return "low_pitch"
195
+ elif pitch < 128:
196
+ return "medium_pitch"
197
+ elif pitch < 142:
198
+ return "high_pitch"
199
+ else:
200
+ return "very_high_pitch"
201
+ else:
202
+ # 默认男性分类
203
+ if pitch < 114:
204
+ return "low_pitch"
205
+ elif pitch < 130:
206
+ return "medium_pitch"
207
+ elif pitch < 151:
208
+ return "high_pitch"
209
+ else:
210
+ return "very_high_pitch"
211
+
212
+ # 未知性别,使用通用分类
213
+ else:
214
+ if pitch < 130:
215
+ return "low_pitch"
216
+ elif pitch < 180:
217
+ return "medium_pitch"
218
+ elif pitch < 220:
219
+ return "high_pitch"
220
+ else:
221
+ return "very_high_pitch"