Spaces:
Runtime error
Runtime error
File size: 6,324 Bytes
003d053 01a69aa 003d053 01a69aa 003d053 01a69aa 003d053 01a69aa 003d053 01a69aa 003d053 01a69aa 003d053 01a69aa 003d053 01a69aa 003d053 01a69aa 003d053 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
try:
import cn2an
except ImportError:
print("The 'cn2an' module is not installed. Please install it using 'pip install cn2an'.")
exit(1)
import re
import numpy as np
import wave
def save_audio(file_name, audio, rate=24000):
"""
保存音频文件
:param file_name:
:param audio:
:param rate:
:return:
"""
audio = (audio * 32767).astype(np.int16)
with wave.open(file_name, "w") as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(rate)
wf.writeframes(audio.tobytes())
def combine_audio(wavs):
"""
合并多段音频
:param wavs:
:return:
"""
wavs = [normalize_audio(w) for w in wavs] # 先对每段音频归一化
combined_audio = np.concatenate(wavs, axis=1) # 沿着时间轴合并
return normalize_audio(combined_audio) # 合并后再次归一化
def normalize_audio(audio):
"""
Normalize audio array to be between -1 and 1
:param audio: Input audio array
:return: Normalized audio array
"""
audio = np.clip(audio, -1, 1)
max_val = np.max(np.abs(audio))
if max_val > 0:
audio = audio / max_val
return audio
def combine_audio_with_crossfade(audio_arrays, crossfade_duration=0.1, rate=24000):
"""
Combine audio arrays with crossfade to avoid clipping noise at the junctions.
:param audio_arrays: List of audio arrays to combine
:param crossfade_duration: Duration of the crossfade in seconds
:param rate: Sample rate of the audio
:return: Combined audio array
"""
crossfade_samples = int(crossfade_duration * rate)
combined_audio = np.array([], dtype=np.float32)
for i in range(len(audio_arrays)):
audio_arrays[i] = np.squeeze(audio_arrays[i]) # Ensure all arrays are 1D
if i == 0:
combined_audio = audio_arrays[i] # Start with the first audio array
else:
# Apply crossfade between the end of the current combined audio and the start of the next array
overlap = np.minimum(len(combined_audio), crossfade_samples)
crossfade_end = combined_audio[-overlap:]
crossfade_start = audio_arrays[i][:overlap]
# Crossfade by linearly blending the audio samples
t = np.linspace(0, 1, overlap)
crossfaded = crossfade_end * (1 - t) + crossfade_start * t
# Combine audio by replacing the end of the current combined audio with the crossfaded audio
combined_audio[-overlap:] = crossfaded
# Append the rest of the new array
combined_audio = np.concatenate((combined_audio, audio_arrays[i][overlap:]))
return combined_audio
def remove_chinese_punctuation(text):
"""
移除文本中的中文标点符号 [:;!(),【】『』「」《》-‘“’”:,;!\(\)\[\]><\-] 替换为 ,
:param text:
:return:
"""
chinese_punctuation_pattern = r"[:;!(),【】『』「」《》-‘“’”:,;!\(\)\[\]><\-]"
text = re.sub(chinese_punctuation_pattern, ' ', text)
# 使用正则表达式将多个连续的句号替换为一个句号
text = re.sub(r'。{2,}', '。', text)
return text
def text_normalize(text):
"""
对文本进行归一化处理
:param text:
:return:
"""
from zh_normalization import TextNormalizer
# ref: https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization
tx = TextNormalizer()
sentences = tx.normalize(text)
# print(sentences)
_txt = ''.join(sentences)
# 替换掉除中文之外的所有字符
_txt = re.sub(
r"[^\u4e00-\u9fa5,。!?、]+", "", _txt
)
return _txt
def convert_numbers_to_chinese(text):
"""
将文本中的数字转换为中文数字 例如 123 -> 一百二十三
:param text:
:return:
"""
return cn2an.transform(text, "an2cn")
def split_text(text, min_length=60):
"""
将文本分割为长度不小于min_length的句子
:param text:
:param min_length:
:return:
"""
sentence_delimiters = re.compile(r'([。?!\.\n]+)')
sentences = re.split(sentence_delimiters, text)
# print(sentences)
# exit()
result = []
current_sentence = ''
for sentence in sentences:
if re.match(sentence_delimiters, sentence):
current_sentence += sentence.strip() + '。'
if len(current_sentence) >= min_length:
result.append(current_sentence.strip())
current_sentence = ''
else:
current_sentence += sentence.strip()
if current_sentence:
if len(current_sentence) < min_length and len(result) > 0:
result[-1] += current_sentence
else:
result.append(current_sentence)
# result = [convert_numbers_to_chinese(remove_chinese_punctuation(_.strip())) for _ in result if _.strip()]
result = [normalize_zh(_.strip()) for _ in result if _.strip()]
return result
def normalize_zh(text):
# return text_normalize(remove_chinese_punctuation(text))
return convert_numbers_to_chinese(remove_chinese_punctuation(text))
def batch_split(items, batch_size=5):
"""
将items划分为大小为batch_size的批次
:param items:
:param batch_size:
:return:
"""
return [items[i:i + batch_size] for i in range(0, len(items), batch_size)]
# 读取 txt 文件,支持自动判断文件编码
def read_long_text(file_path):
"""
读取长文本文件,自动判断文件编码
:param file_path: 文件路径
:return: 文本内容
"""
encodings = ['utf-8', 'gbk', 'iso-8859-1', 'utf-16']
for encoding in encodings:
try:
with open(file_path, 'r', encoding=encoding) as file:
return file.read()
except (UnicodeDecodeError, LookupError):
continue
raise ValueError("无法识别文件编码")
if __name__ == '__main__':
txts = [
"电影中梁朝伟扮演的陈永仁的编号27149",
"这块黄金重达324.75克 我们班的最高总分为583分",
"12\~23 -1.5\~2",
]
for txt in txts:
print(txt, '-->', text_normalize(txt))
# print(txt, '-->', convert_numbers_to_chinese(txt))
|