yueyulin's picture
Upload rwkv7-0.4B-g1-respark-voice-tunable_ipa/properties_util.py with huggingface_hub
9eb14dd verified
SPEED_MAP = {
"very_slow": "SPCT_1",
"slow": "SPCT_2",
"medium": "SPCT_3",
"fast": "SPCT_4",
"very_fast": "SPCT_5",
}
PITCH_MAP = {
"low_pitch": "SPCT_6",
"medium_pitch": "SPCT_7",
"high_pitch": "SPCT_8",
"very_high_pitch": "SPCT_9",
}
AGE_MAP = {
"child": "SPCT_13",
"teenager": "SPCT_14",
"youth-adult": "SPCT_15",
"middle-aged": "SPCT_16",
"elderly": "SPCT_17",
}
EMOTION_MAP = {
"UNKNOWN": "SPCT_21",
"NEUTRAL": "SPCT_22",
"ANGRY": "SPCT_23",
"HAPPY": "SPCT_24",
"SAD": "SPCT_25",
"FEARFUL": "SPCT_26",
"DISGUSTED": "SPCT_27",
"SURPRISED": "SPCT_28",
"SARCASTIC": "SPCT_29",
"EXCITED": "SPCT_30",
"SLEEPY": "SPCT_31",
"CONFUSED": "SPCT_32",
"EMPHASIS": "SPCT_33",
"LAUGHING": "SPCT_34",
"SINGING": "SPCT_35",
"WORRIED": "SPCT_36",
"WHISPER": "SPCT_37",
"ANXIOUS": "SPCT_38",
"NO-AGREEMENT": "SPCT_39",
"APOLOGETIC": "SPCT_40",
"CONCERNED": "SPCT_41",
"ENUNCIATED": "SPCT_42",
"ASSERTIVE": "SPCT_43",
"ENCOURAGING": "SPCT_44",
"CONTEMPT": "SPCT_45",
}
# 注意:这里有两个GENDER_MAP定义,第二个会覆盖第一个
# 第一个定义包含了"unknown",第二个只包含"female"和"male"
# 建议使用第二个定义,因为它更简洁且符合实际使用场景
GENDER_MAP = {
"female": "SPCT_46",
"male": "SPCT_47"
}
def convert_standard_properties_to_tokens(age: str, gender: str, emotion: str, pitch: str, speed: str) -> list:
age_token = AGE_MAP[age.lower()]
gender_token = GENDER_MAP[gender.lower()]
emotion_token = EMOTION_MAP[emotion.upper()]
pitch_token = PITCH_MAP[pitch.lower()]
speed_token = SPEED_MAP[speed.lower()]
return "SPCT_0"+age_token+gender_token+emotion_token+pitch_token+speed_token
def convert_properties_to_tokens(age: str, gender: str, emotion: str, pitch: float, speed: float) -> list:
age_token = AGE_MAP[age.lower()]
gender_token = GENDER_MAP[gender.lower()]
emotion_token = EMOTION_MAP[emotion.upper()]
pitch_token = PITCH_MAP[classify_pitch(pitch, gender.lower(), age.lower())]
speed_token = SPEED_MAP[classify_speed(speed)]
return "SPCT_0"+age_token+gender_token+emotion_token+pitch_token+speed_token
def classify_speed(speed: float) -> str:
if speed <= 3.5:
return "very_slow"
elif 3.5 < speed < 4.0:
return "slow"
elif 4.0 < speed <= 4.5:
return "medium"
elif 4.5 < speed <= 5.0:
return "fast"
else: # speed >= 5.0
return "very_fast"
def classify_pitch(pitch: float, gender: str, age: str) -> str:
"""
根据性别和年龄重新划分pitch区间
基于统计结果:
- female: 平均212.08, 中位数208.76, 25%分位数187.40, 75%分位数232.08
- male: 平均136.22, 中位数129.65, 25%分位数113.76, 75%分位数151.42
"""
gender = gender.lower()
age = age.lower()
# 女性分类
if gender == "female":
if age == "child":
# Child: 平均280.12, 中位数279.34, 范围216.91-324.25
if pitch < 250:
return "low_pitch"
elif pitch < 290:
return "medium_pitch"
else:
return "high_pitch"
elif age == "teenager":
# Teenager: 平均240.61, 中位数238.43, 25%分位数207.54, 75%分位数270.12
if pitch < 208:
return "low_pitch"
elif pitch < 238:
return "medium_pitch"
elif pitch < 270:
return "high_pitch"
else:
return "very_high_pitch"
elif age == "youth-adult":
# Youth-Adult: 平均213.26, 中位数210.99, 25%分位数190.81, 75%分位数232.24
if pitch < 191:
return "low_pitch"
elif pitch < 211:
return "medium_pitch"
elif pitch < 232:
return "high_pitch"
else:
return "very_high_pitch"
elif age == "middle-aged":
# Middle-aged: 平均197.68, 中位数195.01, 25%分位数176.34, 75%分位数215.22
if pitch < 176:
return "low_pitch"
elif pitch < 195:
return "medium_pitch"
elif pitch < 215:
return "high_pitch"
else:
return "very_high_pitch"
elif age == "elderly":
# Elderly: 平均194.91, 中位数189.90, 25%分位数170.42, 75%分位数213.41
if pitch < 170:
return "low_pitch"
elif pitch < 190:
return "medium_pitch"
elif pitch < 213:
return "high_pitch"
else:
return "very_high_pitch"
else:
# 默认女性分类
if pitch < 187:
return "low_pitch"
elif pitch < 209:
return "medium_pitch"
elif pitch < 232:
return "high_pitch"
else:
return "very_high_pitch"
# 男性分类
elif gender == "male":
if age == "teenager":
# Teenager: 平均150.93, 中位数142.50, 25%分位数121.47, 75%分位数165.55
if pitch < 121:
return "low_pitch"
elif pitch < 143:
return "medium_pitch"
elif pitch < 166:
return "high_pitch"
else:
return "very_high_pitch"
elif age == "youth-adult":
# Youth-Adult: 平均137.17, 中位数130.92, 25%分位数114.70, 75%分位数153.18
if pitch < 115:
return "low_pitch"
elif pitch < 131:
return "medium_pitch"
elif pitch < 153:
return "high_pitch"
else:
return "very_high_pitch"
elif age == "middle-aged":
# Middle-aged: 平均132.33, 中位数125.30, 25%分位数110.31, 75%分位数146.55
if pitch < 110:
return "low_pitch"
elif pitch < 125:
return "medium_pitch"
elif pitch < 147:
return "high_pitch"
else:
return "very_high_pitch"
elif age == "elderly":
# Elderly: 平均132.62, 中位数128.42, 25%分位数114.69, 75%分位数141.57
if pitch < 115:
return "low_pitch"
elif pitch < 128:
return "medium_pitch"
elif pitch < 142:
return "high_pitch"
else:
return "very_high_pitch"
else:
# 默认男性分类
if pitch < 114:
return "low_pitch"
elif pitch < 130:
return "medium_pitch"
elif pitch < 151:
return "high_pitch"
else:
return "very_high_pitch"
# 未知性别,使用通用分类
else:
if pitch < 130:
return "low_pitch"
elif pitch < 180:
return "medium_pitch"
elif pitch < 220:
return "high_pitch"
else:
return "very_high_pitch"