fcyai
init
01a69aa
raw
history blame
2.66 kB
import random
import torch
import gradio as gr
import numpy as np
from tools.logger import get_logger
logger = get_logger(" WebUI ")
import ChatTTS
chat = ChatTTS.Chat(get_logger("ChatTTS"))
# 音色选项:用于预置合适的音色
voices = {
"默认": {"seed": 2},
"音色1": {"seed": 1111},
"音色2": {"seed": 2222},
"音色3": {"seed": 3333},
"音色4": {"seed": 4444},
"音色5": {"seed": 5555},
"音色6": {"seed": 6666},
"音色7": {"seed": 7777},
"音色8": {"seed": 8888},
"音色9": {"seed": 9999},
"音色10": {"seed": 11111},
}
def generate_seed():
return gr.update(value=random.randint(1, 100000000))
# 返回选择音色对应的seed
def on_voice_change(vocie_selection):
return voices.get(vocie_selection)['seed']
def refine_text(text, audio_seed_input, text_seed_input, refine_text_flag):
if not refine_text_flag:
return text
global chat
torch.manual_seed(audio_seed_input)
params_refine_text = {'prompt': '[oral_2][laugh_0][break_6]'}
torch.manual_seed(text_seed_input)
text = chat.infer(text,
skip_refine_text=False,
refine_text_only=True,
params_refine_text=params_refine_text,
)
return text[0] if isinstance(text, list) else text
def generate_audio(text, temperature, top_P, top_K, audio_seed_input, text_seed_input, stream):
if not text: return None
global chat
torch.manual_seed(audio_seed_input)
rand_spk = chat.sample_random_speaker()
params_infer_code = {
'spk_emb': rand_spk,
'temperature': temperature,
'top_P': top_P,
'top_K': top_K,
}
torch.manual_seed(text_seed_input)
wav = chat.infer(
text,
skip_refine_text=True,
params_infer_code=params_infer_code,
stream=stream,
)
if stream:
for gen in wav:
wavs = [np.array([[]])]
wavs[0] = np.hstack([wavs[0], np.array(gen[0])])
audio = wavs[0][0]
# normalize
am = np.abs(audio).max() * 32768
if am > 32768:
am = 32768 * 32768 / am
np.multiply(audio, am, audio)
audio = audio.astype(np.int16)
yield 24000, audio
return
audio_data = np.array(wav[0]).flatten()
# normalize
am = np.abs(audio_data).max() * 32768
if am > 32768:
am = 32768 * 32768 / am
np.multiply(audio_data, am, audio_data)
audio_data = audio_data.astype(np.int16)
sample_rate = 24000
yield sample_rate, audio_data