3417543_models

Paused

App Files Files Community

3417543_models / app.py

ajayarora1235

get rid of cutoff time

6736ecf 10 months ago

raw

history blame

100 kB

	import subprocess, torch, os, traceback, sys, warnings, shutil, numpy as np

	import pandas as pd
	import torchaudio
	from lib.voicecraft.data.tokenizer import (
	AudioTokenizer,
	TextTokenizer,
	)
	import whisperx
	import os
	import time
	import gc
	import gradio as gr

	from mega import Mega
	os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1"
	import threading
	from time import sleep
	from subprocess import Popen
	import faiss
	from random import shuffle
	import json, datetime, requests
	now_dir = os.getcwd()
	sys.path.append(now_dir)
	tmp = os.path.join(now_dir, "TEMP")
	shutil.rmtree(tmp, ignore_errors=True)
	shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True)
	os.makedirs(tmp, exist_ok=True)
	os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
	os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True)
	os.environ["TEMP"] = tmp
	warnings.filterwarnings("ignore")
	torch.manual_seed(114514)

	import signal

	import math

	from utils import load_audio, CSVutil

	global DoFormant, Quefrency, Timbre
	from transformers import HubertModel, HubertConfig

	if not os.path.isdir('csvdb/'):
	os.makedirs('csvdb')
	frmnt, stp = open("csvdb/formanting.csv", 'w'), open("csvdb/stop.csv", 'w')
	frmnt.close()
	stp.close()

	try:
	DoFormant, Quefrency, Timbre = CSVutil('csvdb/formanting.csv', 'r', 'formanting')
	DoFormant = (
	lambda DoFormant: True if DoFormant.lower() == 'true' else (False if DoFormant.lower() == 'false' else DoFormant)
	)(DoFormant)
	except (ValueError, TypeError, IndexError):
	DoFormant, Quefrency, Timbre = False, 1.0, 1.0
	CSVutil('csvdb/formanting.csv', 'w+', 'formanting', DoFormant, Quefrency, Timbre)

	def update_message(request: gr.Request):
	change_choices(request.username)
	return f"Welcome, {request.username}"

	def download_models():
	# Download hubert base model if not present
	if not os.path.isfile('./hubert_base.pt'):
	response = requests.get('https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt')

	if response.status_code == 200:
	with open('./hubert_base.pt', 'wb') as f:
	f.write(response.content)
	print("Downloaded hubert base model file successfully. File saved to ./hubert_base.pt.")
	else:
	raise Exception("Failed to download hubert base model file. Status code: " + str(response.status_code) + ".")

	# Download rmvpe model if not present
	if not os.path.isfile('./rmvpe.pt'):
	response = requests.get('https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt?download=true')

	if response.status_code == 200:
	with open('./rmvpe.pt', 'wb') as f:
	f.write(response.content)
	print("Downloaded rmvpe model file successfully. File saved to ./rmvpe.pt.")
	else:
	raise Exception("Failed to download rmvpe model file. Status code: " + str(response.status_code) + ".")

	download_models()

	print("\n-------------------------------\nRVC v2 Easy GUI (Local Edition)\n-------------------------------\n")

	def formant_apply(qfrency, tmbre):
	Quefrency = qfrency
	Timbre = tmbre
	DoFormant = True
	CSVutil('csvdb/formanting.csv', 'w+', 'formanting', DoFormant, qfrency, tmbre)

	return ({"value": Quefrency, "__type__": "update"}, {"value": Timbre, "__type__": "update"})

	def get_fshift_presets():
	fshift_presets_list = []
	for dirpath, _, filenames in os.walk("./formantshiftcfg/"):
	for filename in filenames:
	if filename.endswith(".txt"):
	fshift_presets_list.append(os.path.join(dirpath,filename).replace('\\','/'))

	if len(fshift_presets_list) > 0:
	return fshift_presets_list
	else:
	return ''



	def formant_enabled(cbox, qfrency, tmbre, frmntapply, formantpreset, formant_refresh_button):

	if (cbox):

	DoFormant = True
	CSVutil('csvdb/formanting.csv', 'w+', 'formanting', DoFormant, qfrency, tmbre)
	#print(f"is checked? - {cbox}\ngot {DoFormant}")

	return (
	{"value": True, "__type__": "update"},
	{"visible": True, "__type__": "update"},
	{"visible": True, "__type__": "update"},
	{"visible": True, "__type__": "update"},
	{"visible": True, "__type__": "update"},
	{"visible": True, "__type__": "update"},
	)


	else:

	DoFormant = False
	CSVutil('csvdb/formanting.csv', 'w+', 'formanting', DoFormant, qfrency, tmbre)

	#print(f"is checked? - {cbox}\ngot {DoFormant}")
	return (
	{"value": False, "__type__": "update"},
	{"visible": False, "__type__": "update"},
	{"visible": False, "__type__": "update"},
	{"visible": False, "__type__": "update"},
	{"visible": False, "__type__": "update"},
	{"visible": False, "__type__": "update"},
	{"visible": False, "__type__": "update"},
	)



	def preset_apply(preset, qfer, tmbr):
	if str(preset) != '':
	with open(str(preset), 'r') as p:
	content = p.readlines()
	qfer, tmbr = content[0].split('\n')[0], content[1]

	formant_apply(qfer, tmbr)
	else:
	pass
	return ({"value": qfer, "__type__": "update"}, {"value": tmbr, "__type__": "update"})

	def update_fshift_presets(preset, qfrency, tmbre):

	qfrency, tmbre = preset_apply(preset, qfrency, tmbre)

	if (str(preset) != ''):
	with open(str(preset), 'r') as p:
	content = p.readlines()
	qfrency, tmbre = content[0].split('\n')[0], content[1]

	formant_apply(qfrency, tmbre)
	else:
	pass
	return (
	{"choices": get_fshift_presets(), "__type__": "update"},
	{"value": qfrency, "__type__": "update"},
	{"value": tmbre, "__type__": "update"},
	)

	# i18n = I18nAuto()
	#i18n.print()
	# 判断是否有能用来训练和加速推理的N卡
	ngpu = torch.cuda.device_count()
	gpu_infos = []
	mem = []
	if (not torch.cuda.is_available()) or ngpu == 0:
	if_gpu_ok = False
	else:
	if_gpu_ok = False
	for i in range(ngpu):
	gpu_name = torch.cuda.get_device_name(i)
	if (
	"10" in gpu_name
	or "16" in gpu_name
	or "20" in gpu_name
	or "30" in gpu_name
	or "40" in gpu_name
	or "A2" in gpu_name.upper()
	or "A3" in gpu_name.upper()
	or "A4" in gpu_name.upper()
	or "P4" in gpu_name.upper()
	or "A50" in gpu_name.upper()
	or "A60" in gpu_name.upper()
	or "70" in gpu_name
	or "80" in gpu_name
	or "90" in gpu_name
	or "M4" in gpu_name.upper()
	or "T4" in gpu_name.upper()
	or "TITAN" in gpu_name.upper()
	): # A10#A100#V100#A40#P40#M40#K80#A4500
	if_gpu_ok = True # 至少有一张能用的N卡
	gpu_infos.append("%s\t%s" % (i, gpu_name))
	mem.append(
	int(
	torch.cuda.get_device_properties(i).total_memory
	/ 1024
	/ 1024
	/ 1024
	+ 0.4
	)
	)
	if if_gpu_ok == True and len(gpu_infos) > 0:
	gpu_info = "\n".join(gpu_infos)
	default_batch_size = min(mem) // 2
	else:
	gpu_info = "test"
	default_batch_size = 1
	gpus = "-".join([i[0] for i in gpu_infos])
	from lib.infer_pack.models import (
	SynthesizerTrnMs256NSFsid,
	SynthesizerTrnMs256NSFsid_nono,
	SynthesizerTrnMs768NSFsid,
	SynthesizerTrnMs768NSFsid_nono,
	)
	import soundfile as sf
	import logging
	from vc_infer_pipeline import VC
	from config import Config

	config = Config()
	# from trainset_preprocess_pipeline import PreProcess
	logging.getLogger("numba").setLevel(logging.WARNING)

	hubert_model = None
	voicecraft_model = None
	voicecraft_config = None
	phn2num = None
	associated_links = {}

	def load_hubert():
	global hubert_model
	# Load the model

	configH= HubertConfig()
	configH.output_hidden_states = True
	hubert_model = HubertModel(configH)
	hubert_model.load_state_dict(torch.load('hubert_base_hf_statedict.pt'))
	# Prepare the model
	hubert_model = hubert_model.to(config.device)
	if config.is_half:
	hubert_model = hubert_model.half()
	else:
	hubert_model = hubert_model.float()

	hubert_model.eval()

	# models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
	# ["hubert_base.pt"],
	# suffix="",
	# )
	# hubert_model = models[0]

	def load_voicecraft():
	global voicecraft_model, phn2num, voicecraft_config

	from lib.voicecraft.models import voicecraft
	voicecraft_name = "giga330M.pth"
	ckpt_fn = f"./pretrained_models/{voicecraft_name}"
	encodec_fn = "./pretrained_models/encodec_4cb2048_giga.th"
	if not os.path.exists(ckpt_fn):
	os.system(f"wget https://huggingface.co/pyp1/VoiceCraft/resolve/main/{voicecraft_name}\?download\=true")
	os.system(f"mv {voicecraft_name}\?download\=true ./pretrained_models/{voicecraft_name}")
	if not os.path.exists(encodec_fn):
	os.system(f"wget https://huggingface.co/pyp1/VoiceCraft/resolve/main/encodec_4cb2048_giga.th")
	os.system(f"mv encodec_4cb2048_giga.th ./pretrained_models/encodec_4cb2048_giga.th")

	ckpt = torch.load(ckpt_fn, map_location="cpu")
	voicecraft_config = ckpt["config"]
	voicecraft_model = voicecraft.VoiceCraft(ckpt["config"])
	voicecraft_model.load_state_dict(ckpt["model"])
	voicecraft_model.to(config.device)
	voicecraft_model.eval()

	phn2num = ckpt['phn2num']

	weight_root = "weights"
	index_root = "logs"
	names = []
	for name in os.listdir(weight_root):
	if name.endswith(".pth"):
	names.append(name)
	index_paths = []
	for root, dirs, files in os.walk(index_root, topdown=False):
	for name in files:
	if name.endswith(".index") and "trained" not in name:
	index_paths.append("%s/%s" % (root, name))



	def vc_single(
	sid,
	input_audio_path,
	f0_up_key,
	f0_file,
	f0_method,
	file_index,
	#file_index2,
	# file_big_npy,
	index_rate,
	filter_radius,
	resample_sr,
	rms_mix_rate,
	protect,
	crepe_hop_length,
	): # spk_item, input_audio0, vc_transform0,f0_file,f0method0
	global tgt_sr, net_g, vc, hubert_model, version
	if input_audio_path is None:
	return "You need to upload an audio", None
	f0_up_key = int(f0_up_key)
	try:
	audio = load_audio(input_audio_path, 16000, DoFormant, Quefrency, Timbre)
	audio_max = np.abs(audio).max() / 0.95
	if audio_max > 1:
	audio /= audio_max
	times = [0, 0, 0]
	if hubert_model == None:
	load_hubert()
	if_f0 = cpt.get("f0", 1)
	file_index = (
	(
	file_index.strip(" ")
	.strip('"')
	.strip("\n")
	.strip('"')
	.strip(" ")
	.replace("trained", "added")
	)
	) # 防止小白写错，自动帮他替换掉
	# file_big_npy = (
	# file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
	# )
	audio_opt = vc.pipeline(
	hubert_model,
	net_g,
	sid,
	audio,
	input_audio_path,
	times,
	f0_up_key,
	f0_method,
	file_index,
	# file_big_npy,
	index_rate,
	if_f0,
	filter_radius,
	tgt_sr,
	resample_sr,
	rms_mix_rate,
	version,
	protect,
	crepe_hop_length,
	f0_file=f0_file,
	)
	if resample_sr >= 16000 and tgt_sr != resample_sr:
	tgt_sr = resample_sr
	index_info = (
	"Using index:%s." % file_index
	if os.path.exists(file_index)
	else "Index not used."
	)
	return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % (
	index_info,
	times[0],
	times[1],
	times[2],
	), (tgt_sr, audio_opt)
	except:
	info = traceback.format_exc()
	print(info)
	return info, (None, None)


	def vc_multi(
	sid,
	dir_path,
	opt_root,
	paths,
	f0_up_key,
	f0_method,
	file_index,
	file_index2,
	# file_big_npy,
	index_rate,
	filter_radius,
	resample_sr,
	rms_mix_rate,
	protect,
	format1,
	crepe_hop_length,
	):
	try:
	dir_path = (
	dir_path.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
	) # 防止小白拷路径头尾带了空格和"和回车
	opt_root = opt_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
	os.makedirs(opt_root, exist_ok=True)
	try:
	if dir_path != "":
	paths = [os.path.join(dir_path, name) for name in os.listdir(dir_path)]
	else:
	paths = [path.name for path in paths]
	except:
	traceback.print_exc()
	paths = [path.name for path in paths]
	infos = []
	for path in paths:
	info, opt = vc_single(
	sid,
	path,
	f0_up_key,
	None,
	f0_method,
	file_index,
	# file_big_npy,
	index_rate,
	filter_radius,
	resample_sr,
	rms_mix_rate,
	protect,
	crepe_hop_length
	)
	if "Success" in info:
	try:
	tgt_sr, audio_opt = opt
	if format1 in ["wav", "flac"]:
	sf.write(
	"%s/%s.%s" % (opt_root, os.path.basename(path), format1),
	audio_opt,
	tgt_sr,
	)
	else:
	path = "%s/%s.wav" % (opt_root, os.path.basename(path))
	sf.write(
	path,
	audio_opt,
	tgt_sr,
	)
	if os.path.exists(path):
	os.system(
	"ffmpeg -i %s -vn %s -q:a 2 -y"
	% (path, path[:-4] + ".%s" % format1)
	)
	except:
	info += traceback.format_exc()
	infos.append("%s->%s" % (os.path.basename(path), info))
	yield "\n".join(infos)
	yield "\n".join(infos)
	except:
	yield traceback.format_exc()

	# 一个选项卡全局只能有一个音色
	def get_vc(sid):
	global n_spk, tgt_sr, net_g, vc, cpt, version
	if sid == "" or sid == []:
	global hubert_model
	if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
	print("clean_empty_cache")
	del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt
	hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	###楼下不这么折腾清理不干净
	if_f0 = cpt.get("f0", 1)
	version = cpt.get("version", "v1")
	if version == "v1":
	if if_f0 == 1:
	net_g = SynthesizerTrnMs256NSFsid(
	*cpt["config"], is_half=config.is_half
	)
	else:
	net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
	elif version == "v2":
	if if_f0 == 1:
	net_g = SynthesizerTrnMs768NSFsid(
	*cpt["config"], is_half=config.is_half
	)
	else:
	net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
	del net_g, cpt
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	cpt = None
	return {"visible": False, "__type__": "update"}
	person = "%s/%s" % (weight_root, sid)
	print("loading %s" % person)
	cpt = torch.load(person, map_location="cpu")
	tgt_sr = cpt["config"][-1]
	cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
	if_f0 = cpt.get("f0", 1)
	version = cpt.get("version", "v1")
	if version == "v1":
	if if_f0 == 1:
	net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
	else:
	net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
	elif version == "v2":
	if if_f0 == 1:
	net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half)
	else:
	net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
	del net_g.enc_q
	print(net_g.load_state_dict(cpt["weight"], strict=False))
	net_g.eval().to(config.device)
	if config.is_half:
	net_g = net_g.half()
	else:
	net_g = net_g.float()
	vc = VC(tgt_sr, config)
	n_spk = cpt["config"][-3]
	return {"visible": False, "maximum": n_spk, "__type__": "update"}


	def change_choices(username=None):
	names = []
	print(associated_links)
	for name in os.listdir(weight_root):
	if name.endswith(".pth"):
	if username is None:
	names.append(name)
	else:
	if associated_links.get(name) == username:
	names.append(name)
	index_paths = []
	for root, dirs, files in os.walk(index_root, topdown=False):
	for name in files:
	if name.endswith(".index") and "trained" not in name:
	if username is None:
	index_paths.append("%s/%s" % (root, name))
	else:
	if associated_links.get(name) == username:
	index_paths.append("%s/%s" % (root, name))
	return {"choices": sorted(names), "__type__": "update"}, {
	"choices": sorted(index_paths),
	"__type__": "update",
	}


	def clean():
	return {"value": "", "__type__": "update"}


	sr_dict = {
	"32k": 32000,
	"40k": 40000,
	"48k": 48000,
	}


	def if_done(done, p):
	while 1:
	if p.poll() == None:
	sleep(0.5)
	else:
	break
	done[0] = True


	def if_done_multi(done, ps):
	while 1:
	# poll==None代表进程未结束
	# 只要有一个进程未结束都不停
	flag = 1
	for p in ps:
	if p.poll() == None:
	flag = 0
	sleep(0.5)
	break
	if flag == 1:
	break
	done[0] = True


	def preprocess_dataset(trainset_dir, exp_dir, sr, n_p):
	sr = sr_dict[sr]
	os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
	f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w")
	f.close()
	cmd = (
	config.python_cmd
	+ " trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s "
	% (trainset_dir, sr, n_p, now_dir, exp_dir)
	+ str(config.noparallel)
	)
	print(cmd)
	p = Popen(cmd, shell=True) # , stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir
	###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
	done = [False]
	threading.Thread(
	target=if_done,
	args=(
	done,
	p,
	),
	).start()
	while 1:
	with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f:
	yield (f.read())
	sleep(1)
	if done[0] == True:
	break
	with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f:
	log = f.read()
	print(log)
	yield log

	# but2.click(extract_f0,[gpus6,np7,f0method8,if_f0_3,trainset_dir4],[info2])
	def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, echl):
	gpus = gpus.split("-")
	os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
	f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w")
	f.close()
	if if_f0:
	cmd = config.python_cmd + " extract_f0_print.py %s/logs/%s %s %s %s" % (
	now_dir,
	exp_dir,
	n_p,
	f0method,
	echl,
	)
	print(cmd)
	p = Popen(cmd, shell=True, cwd=now_dir) # , stdin=PIPE, stdout=PIPE,stderr=PIPE
	###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
	done = [False]
	threading.Thread(
	target=if_done,
	args=(
	done,
	p,
	),
	).start()
	while 1:
	with open(
	"%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r"
	) as f:
	yield (f.read())
	sleep(1)
	if done[0] == True:
	break
	with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
	log = f.read()
	print(log)
	yield log
	####对不同part分别开多进程
	"""
	n_part=int(sys.argv[1])
	i_part=int(sys.argv[2])
	i_gpu=sys.argv[3]
	exp_dir=sys.argv[4]
	os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)
	"""
	leng = len(gpus)
	ps = []
	for idx, n_g in enumerate(gpus):
	cmd = (
	config.python_cmd
	+ " extract_feature_print.py %s %s %s %s %s/logs/%s %s"
	% (
	config.device,
	leng,
	idx,
	n_g,
	now_dir,
	exp_dir,
	version19,
	)
	)
	print(cmd)
	p = Popen(
	cmd, shell=True, cwd=now_dir
	) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
	ps.append(p)
	###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
	done = [False]
	threading.Thread(
	target=if_done_multi,
	args=(
	done,
	ps,
	),
	).start()
	while 1:
	with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
	yield (f.read())
	sleep(1)
	if done[0] == True:
	break
	with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
	log = f.read()
	print(log)
	yield log


	def change_sr2(sr2, if_f0_3, version19):
	path_str = "" if version19 == "v1" else "_v2"
	f0_str = "f0" if if_f0_3 else ""
	if_pretrained_generator_exist = os.access("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK)
	if_pretrained_discriminator_exist = os.access("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK)
	if (if_pretrained_generator_exist == False):
	print("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model")
	if (if_pretrained_discriminator_exist == False):
	print("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model")
	return (
	("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)) if if_pretrained_generator_exist else "",
	("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)) if if_pretrained_discriminator_exist else "",
	{"visible": True, "__type__": "update"}
	)

	def change_version19(sr2, if_f0_3, version19):
	path_str = "" if version19 == "v1" else "_v2"
	f0_str = "f0" if if_f0_3 else ""
	if_pretrained_generator_exist = os.access("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK)
	if_pretrained_discriminator_exist = os.access("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK)
	if (if_pretrained_generator_exist == False):
	print("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model")
	if (if_pretrained_discriminator_exist == False):
	print("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model")
	return (
	("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)) if if_pretrained_generator_exist else "",
	("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)) if if_pretrained_discriminator_exist else "",
	)


	def change_f0(if_f0_3, sr2, version19): # f0method8,pretrained_G14,pretrained_D15
	path_str = "" if version19 == "v1" else "_v2"
	if_pretrained_generator_exist = os.access("pretrained%s/f0G%s.pth" % (path_str, sr2), os.F_OK)
	if_pretrained_discriminator_exist = os.access("pretrained%s/f0D%s.pth" % (path_str, sr2), os.F_OK)
	if (if_pretrained_generator_exist == False):
	print("pretrained%s/f0G%s.pth" % (path_str, sr2), "not exist, will not use pretrained model")
	if (if_pretrained_discriminator_exist == False):
	print("pretrained%s/f0D%s.pth" % (path_str, sr2), "not exist, will not use pretrained model")
	if if_f0_3:
	return (
	{"visible": True, "__type__": "update"},
	"pretrained%s/f0G%s.pth" % (path_str, sr2) if if_pretrained_generator_exist else "",
	"pretrained%s/f0D%s.pth" % (path_str, sr2) if if_pretrained_discriminator_exist else "",
	)
	return (
	{"visible": False, "__type__": "update"},
	("pretrained%s/G%s.pth" % (path_str, sr2)) if if_pretrained_generator_exist else "",
	("pretrained%s/D%s.pth" % (path_str, sr2)) if if_pretrained_discriminator_exist else "",
	)


	global log_interval


	def set_log_interval(exp_dir, batch_size12):
	log_interval = 1

	folder_path = os.path.join(exp_dir, "1_16k_wavs")

	if os.path.exists(folder_path) and os.path.isdir(folder_path):
	wav_files = [f for f in os.listdir(folder_path) if f.endswith(".wav")]
	if wav_files:
	sample_size = len(wav_files)
	log_interval = math.ceil(sample_size / batch_size12)
	if log_interval > 1:
	log_interval += 1
	return log_interval

	# but3.click(click_train,[exp_dir1,sr2,if_f0_3,save_epoch10,total_epoch11,batch_size12,if_save_latest13,pretrained_G14,pretrained_D15,gpus16])
	def click_train(
	exp_dir1,
	sr2,
	if_f0_3,
	spk_id5,
	save_epoch10,
	total_epoch11,
	batch_size12,
	if_save_latest13,
	pretrained_G14,
	pretrained_D15,
	gpus16,
	if_cache_gpu17,
	if_save_every_weights18,
	version19,
	):
	CSVutil('csvdb/stop.csv', 'w+', 'formanting', False)
	# 生成filelist
	exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
	os.makedirs(exp_dir, exist_ok=True)
	gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir)
	feature_dir = (
	"%s/3_feature256" % (exp_dir)
	if version19 == "v1"
	else "%s/3_feature768" % (exp_dir)
	)

	log_interval = set_log_interval(exp_dir, batch_size12)

	if if_f0_3:
	f0_dir = "%s/2a_f0" % (exp_dir)
	f0nsf_dir = "%s/2b-f0nsf" % (exp_dir)
	names = (
	set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
	& set([name.split(".")[0] for name in os.listdir(feature_dir)])
	& set([name.split(".")[0] for name in os.listdir(f0_dir)])
	& set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
	)
	else:
	names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
	[name.split(".")[0] for name in os.listdir(feature_dir)]
	)
	opt = []
	for name in names:
	if if_f0_3:
	opt.append(
	"%s/%s.wav\|%s/%s.npy\|%s/%s.wav.npy\|%s/%s.wav.npy\|%s"
	% (
	gt_wavs_dir.replace("\\", "\\\\"),
	name,
	feature_dir.replace("\\", "\\\\"),
	name,
	f0_dir.replace("\\", "\\\\"),
	name,
	f0nsf_dir.replace("\\", "\\\\"),
	name,
	spk_id5,
	)
	)
	else:
	opt.append(
	"%s/%s.wav\|%s/%s.npy\|%s"
	% (
	gt_wavs_dir.replace("\\", "\\\\"),
	name,
	feature_dir.replace("\\", "\\\\"),
	name,
	spk_id5,
	)
	)
	fea_dim = 256 if version19 == "v1" else 768
	if if_f0_3:
	for _ in range(2):
	opt.append(
	"%s/logs/mute/0_gt_wavs/mute%s.wav\|%s/logs/mute/3_feature%s/mute.npy\|%s/logs/mute/2a_f0/mute.wav.npy\|%s/logs/mute/2b-f0nsf/mute.wav.npy\|%s"
	% (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5)
	)
	else:
	for _ in range(2):
	opt.append(
	"%s/logs/mute/0_gt_wavs/mute%s.wav\|%s/logs/mute/3_feature%s/mute.npy\|%s"
	% (now_dir, sr2, now_dir, fea_dim, spk_id5)
	)
	shuffle(opt)
	with open("%s/filelist.txt" % exp_dir, "w") as f:
	f.write("\n".join(opt))
	print("write filelist done")
	# 生成config#无需生成config
	# cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0"
	print("use gpus:", gpus16)
	if pretrained_G14 == "":
	print("no pretrained Generator")
	if pretrained_D15 == "":
	print("no pretrained Discriminator")
	if gpus16:
	cmd = (
	config.python_cmd
	+ " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s -li %s"
	% (
	exp_dir1,
	sr2,
	1 if if_f0_3 else 0,
	batch_size12,
	gpus16,
	total_epoch11,
	save_epoch10,
	("-pg %s" % pretrained_G14) if pretrained_G14 != "" else "",
	("-pd %s" % pretrained_D15) if pretrained_D15 != "" else "",
	1 if if_save_latest13 == True else 0,
	1 if if_cache_gpu17 == True else 0,
	1 if if_save_every_weights18 == True else 0,
	version19,
	log_interval,
	)
	)
	else:
	cmd = (
	config.python_cmd
	+ " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s -li %s"
	% (
	exp_dir1,
	sr2,
	1 if if_f0_3 else 0,
	batch_size12,
	total_epoch11,
	save_epoch10,
	("-pg %s" % pretrained_G14) if pretrained_G14 != "" else "\b",
	("-pd %s" % pretrained_D15) if pretrained_D15 != "" else "\b",
	1 if if_save_latest13 == True else 0,
	1 if if_cache_gpu17 == True else 0,
	1 if if_save_every_weights18 == True else 0,
	version19,
	log_interval,
	)
	)
	print(cmd)
	p = Popen(cmd, shell=True, cwd=now_dir)
	global PID
	PID = p.pid
	p.wait()
	return ("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log", {"visible": False, "__type__": "update"}, {"visible": True, "__type__": "update"})


	# but4.click(train_index, [exp_dir1], info3)
	def train_index(exp_dir1, version19):
	exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
	os.makedirs(exp_dir, exist_ok=True)
	feature_dir = (
	"%s/3_feature256" % (exp_dir)
	if version19 == "v1"
	else "%s/3_feature768" % (exp_dir)
	)
	if os.path.exists(feature_dir) == False:
	return "请先进行特征提取!"
	listdir_res = list(os.listdir(feature_dir))
	if len(listdir_res) == 0:
	return "请先进行特征提取！"
	npys = []
	for name in sorted(listdir_res):
	phone = np.load("%s/%s" % (feature_dir, name))
	npys.append(phone)
	big_npy = np.concatenate(npys, 0)
	big_npy_idx = np.arange(big_npy.shape[0])
	np.random.shuffle(big_npy_idx)
	big_npy = big_npy[big_npy_idx]
	np.save("%s/total_fea.npy" % exp_dir, big_npy)
	# n_ivf = big_npy.shape[0] // 39
	n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
	infos = []
	infos.append("%s,%s" % (big_npy.shape, n_ivf))
	yield "\n".join(infos)
	index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf)
	# index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,PQ128x4fs,RFlat"%n_ivf)
	infos.append("training")
	yield "\n".join(infos)
	index_ivf = faiss.extract_index_ivf(index) #
	index_ivf.nprobe = 1
	index.train(big_npy)
	faiss.write_index(
	index,
	"%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index"
	% (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
	)
	# faiss.write_index(index, '%s/trained_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19))
	infos.append("adding")
	yield "\n".join(infos)
	batch_size_add = 8192
	for i in range(0, big_npy.shape[0], batch_size_add):
	index.add(big_npy[i : i + batch_size_add])
	faiss.write_index(
	index,
	"%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index"
	% (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
	)
	infos.append(
	"成功构建索引，added_IVF%s_Flat_nprobe_%s_%s_%s.index"
	% (n_ivf, index_ivf.nprobe, exp_dir1, version19)
	)
	# faiss.write_index(index, '%s/added_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19))
	# infos.append("成功构建索引，added_IVF%s_Flat_FastScan_%s.index"%(n_ivf,version19))
	yield "\n".join(infos)


	# but5.click(train1key, [exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0method8, save_epoch10, total_epoch11, batch_size12, if_save_latest13, pretrained_G14, pretrained_D15, gpus16, if_cache_gpu17], info3)
	def train1key(
	exp_dir1,
	sr2,
	if_f0_3,
	trainset_dir4,
	spk_id5,
	np7,
	f0method8,
	save_epoch10,
	total_epoch11,
	batch_size12,
	if_save_latest13,
	pretrained_G14,
	pretrained_D15,
	gpus16,
	if_cache_gpu17,
	if_save_every_weights18,
	version19,
	echl
	):
	infos = []

	def get_info_str(strr):
	infos.append(strr)
	return "\n".join(infos)

	model_log_dir = "%s/logs/%s" % (now_dir, exp_dir1)
	preprocess_log_path = "%s/preprocess.log" % model_log_dir
	extract_f0_feature_log_path = "%s/extract_f0_feature.log" % model_log_dir
	gt_wavs_dir = "%s/0_gt_wavs" % model_log_dir
	feature_dir = (
	"%s/3_feature256" % model_log_dir
	if version19 == "v1"
	else "%s/3_feature768" % model_log_dir
	)

	os.makedirs(model_log_dir, exist_ok=True)
	#########step1:处理数据
	open(preprocess_log_path, "w").close()
	cmd = (
	config.python_cmd
	+ " trainset_preprocess_pipeline_print.py %s %s %s %s "
	% (trainset_dir4, sr_dict[sr2], np7, model_log_dir)
	+ str(config.noparallel)
	)
	yield get_info_str("step1: step 1")
	yield get_info_str(cmd)
	p = Popen(cmd, shell=True)
	p.wait()
	with open(preprocess_log_path, "r") as f:
	print(f.read())
	#########step2a:提取音高
	open(extract_f0_feature_log_path, "w")
	if if_f0_3:
	yield get_info_str("step2a:正在提取音高")
	cmd = config.python_cmd + " extract_f0_print.py %s %s %s %s" % (
	model_log_dir,
	np7,
	f0method8,
	echl
	)
	yield get_info_str(cmd)
	p = Popen(cmd, shell=True, cwd=now_dir)
	p.wait()
	with open(extract_f0_feature_log_path, "r") as f:
	print(f.read())
	else:
	yield get_info_str("step2a:step2a")
	#######step2b:提取特征
	yield get_info_str("step2b:step2b")
	gpus = gpus16.split("-")
	leng = len(gpus)
	ps = []
	for idx, n_g in enumerate(gpus):
	cmd = config.python_cmd + " extract_feature_print.py %s %s %s %s %s %s" % (
	config.device,
	leng,
	idx,
	n_g,
	model_log_dir,
	version19,
	)
	yield get_info_str(cmd)
	p = Popen(
	cmd, shell=True, cwd=now_dir
	) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
	ps.append(p)
	for p in ps:
	p.wait()
	with open(extract_f0_feature_log_path, "r") as f:
	print(f.read())
	#######step3a:训练模型
	yield get_info_str("step3a:step3a")
	# 生成filelist
	if if_f0_3:
	f0_dir = "%s/2a_f0" % model_log_dir
	f0nsf_dir = "%s/2b-f0nsf" % model_log_dir
	names = (
	set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
	& set([name.split(".")[0] for name in os.listdir(feature_dir)])
	& set([name.split(".")[0] for name in os.listdir(f0_dir)])
	& set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
	)
	else:
	names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
	[name.split(".")[0] for name in os.listdir(feature_dir)]
	)
	opt = []
	for name in names:
	if if_f0_3:
	opt.append(
	"%s/%s.wav\|%s/%s.npy\|%s/%s.wav.npy\|%s/%s.wav.npy\|%s"
	% (
	gt_wavs_dir.replace("\\", "\\\\"),
	name,
	feature_dir.replace("\\", "\\\\"),
	name,
	f0_dir.replace("\\", "\\\\"),
	name,
	f0nsf_dir.replace("\\", "\\\\"),
	name,
	spk_id5,
	)
	)
	else:
	opt.append(
	"%s/%s.wav\|%s/%s.npy\|%s"
	% (
	gt_wavs_dir.replace("\\", "\\\\"),
	name,
	feature_dir.replace("\\", "\\\\"),
	name,
	spk_id5,
	)
	)
	fea_dim = 256 if version19 == "v1" else 768
	if if_f0_3:
	for _ in range(2):
	opt.append(
	"%s/logs/mute/0_gt_wavs/mute%s.wav\|%s/logs/mute/3_feature%s/mute.npy\|%s/logs/mute/2a_f0/mute.wav.npy\|%s/logs/mute/2b-f0nsf/mute.wav.npy\|%s"
	% (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5)
	)
	else:
	for _ in range(2):
	opt.append(
	"%s/logs/mute/0_gt_wavs/mute%s.wav\|%s/logs/mute/3_feature%s/mute.npy\|%s"
	% (now_dir, sr2, now_dir, fea_dim, spk_id5)
	)
	shuffle(opt)
	with open("%s/filelist.txt" % model_log_dir, "w") as f:
	f.write("\n".join(opt))
	yield get_info_str("write filelist done")
	if gpus16:
	cmd = (
	config.python_cmd
	+" train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s"
	% (
	exp_dir1,
	sr2,
	1 if if_f0_3 else 0,
	batch_size12,
	gpus16,
	total_epoch11,
	save_epoch10,
	("-pg %s" % pretrained_G14) if pretrained_G14 != "" else "",
	("-pd %s" % pretrained_D15) if pretrained_D15 != "" else "",
	1 if if_save_latest13 == True else 0,
	1 if if_cache_gpu17 == True else 0,
	1 if if_save_every_weights18 == True else 0,
	version19,
	)
	)
	else:
	cmd = (
	config.python_cmd
	+ " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s"
	% (
	exp_dir1,
	sr2,
	1 if if_f0_3 else 0,
	batch_size12,
	total_epoch11,
	save_epoch10,
	("-pg %s" % pretrained_G14) if pretrained_G14 != "" else "",
	("-pd %s" % pretrained_D15) if pretrained_D15 != "" else "",
	1 if if_save_latest13 == True else 0,
	1 if if_cache_gpu17 == True else 0,
	1 if if_save_every_weights18 == True else 0,
	version19,
	)
	)
	yield get_info_str(cmd)
	p = Popen(cmd, shell=True, cwd=now_dir)
	p.wait()
	yield get_info_str("training done, in train.log")
	#######step3b:训练索引
	npys = []
	listdir_res = list(os.listdir(feature_dir))
	for name in sorted(listdir_res):
	phone = np.load("%s/%s" % (feature_dir, name))
	npys.append(phone)
	big_npy = np.concatenate(npys, 0)

	big_npy_idx = np.arange(big_npy.shape[0])
	np.random.shuffle(big_npy_idx)
	big_npy = big_npy[big_npy_idx]
	np.save("%s/total_fea.npy" % model_log_dir, big_npy)

	# n_ivf = big_npy.shape[0] // 39
	n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
	yield get_info_str("%s,%s" % (big_npy.shape, n_ivf))
	index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf)
	yield get_info_str("training index")
	index_ivf = faiss.extract_index_ivf(index) #
	index_ivf.nprobe = 1
	index.train(big_npy)
	faiss.write_index(
	index,
	"%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index"
	% (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
	)
	yield get_info_str("adding index")
	batch_size_add = 8192
	for i in range(0, big_npy.shape[0], batch_size_add):
	index.add(big_npy[i : i + batch_size_add])
	faiss.write_index(
	index,
	"%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index"
	% (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
	)
	yield get_info_str(
	"成功构建索引, added_IVF%s_Flat_nprobe_%s_%s_%s.index"
	% (n_ivf, index_ivf.nprobe, exp_dir1, version19)
	)
	yield get_info_str("yes!")


	def whethercrepeornah(radio):
	mango = True if radio == 'mangio-crepe' or radio == 'mangio-crepe-tiny' else False
	return ({"visible": mango, "__type__": "update"})

	# ckpt_path2.change(change_info_,[ckpt_path2],[sr__,if_f0__])
	def change_info_(ckpt_path):
	if (
	os.path.exists(ckpt_path.replace(os.path.basename(ckpt_path), "train.log"))
	== False
	):
	return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
	try:
	with open(
	ckpt_path.replace(os.path.basename(ckpt_path), "train.log"), "r"
	) as f:
	info = eval(f.read().strip("\n").split("\n")[0].split("\t")[-1])
	sr, f0 = info["sample_rate"], info["if_f0"]
	version = "v2" if ("version" in info and info["version"] == "v2") else "v1"
	return sr, str(f0), version
	except:
	traceback.print_exc()
	return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}


	from lib.infer_pack.models_onnx import SynthesizerTrnMsNSFsidM


	def export_onnx(ModelPath, ExportedPath, MoeVS=True):
	cpt = torch.load(ModelPath, map_location="cpu")
	cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
	hidden_channels = 256 if cpt.get("version","v1")=="v1"else 768#cpt["config"][-2] # hidden_channels，为768Vec做准备

	test_phone = torch.rand(1, 200, hidden_channels) # hidden unit
	test_phone_lengths = torch.tensor([200]).long() # hidden unit 长度（貌似没啥用）
	test_pitch = torch.randint(size=(1, 200), low=5, high=255) # 基频（单位赫兹）
	test_pitchf = torch.rand(1, 200) # nsf基频
	test_ds = torch.LongTensor([0]) # 说话人ID
	test_rnd = torch.rand(1, 192, 200) # 噪声（加入随机因子）

	device = "cpu" # 导出时设备（不影响使用模型）


	net_g = SynthesizerTrnMsNSFsidM(
	*cpt["config"], is_half=False,version=cpt.get("version","v1")
	) # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
	net_g.load_state_dict(cpt["weight"], strict=False)
	input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
	output_names = [
	"audio",
	]
	# net_g.construct_spkmixmap(n_speaker) 多角色混合轨道导出
	torch.onnx.export(
	net_g,
	(
	test_phone.to(device),
	test_phone_lengths.to(device),
	test_pitch.to(device),
	test_pitchf.to(device),
	test_ds.to(device),
	test_rnd.to(device),
	),
	ExportedPath,
	dynamic_axes={
	"phone": [1],
	"pitch": [1],
	"pitchf": [1],
	"rnd": [2],
	},
	do_constant_folding=False,
	opset_version=16,
	verbose=False,
	input_names=input_names,
	output_names=output_names,
	)
	return "Finished"

	#region RVC WebUI App

	def get_presets():
	data = None
	with open('../inference-presets.json', 'r') as file:
	data = json.load(file)
	preset_names = []
	for preset in data['presets']:
	preset_names.append(preset['name'])

	return preset_names

	def change_choices2():
	audio_files=[]
	for filename in os.listdir("./audios"):
	if filename.endswith(('.wav','.mp3','.ogg','.flac','.m4a','.aac','.mp4')):
	audio_files.append(os.path.join('./audios',filename).replace('\\', '/'))
	return {"choices": sorted(audio_files), "__type__": "update"}, {"__type__": "update"}

	audio_files=[]
	for filename in os.listdir("./audios"):
	if filename.endswith(('.wav','.mp3','.ogg','.flac','.m4a','.aac','.mp4')):
	audio_files.append(os.path.join('./audios',filename).replace('\\', '/'))

	def get_index():
	if check_for_name() != '':
	chosen_model=sorted(names)[0].split(".")[0]
	logs_path="./logs/"+chosen_model
	if os.path.exists(logs_path):
	for file in os.listdir(logs_path):
	if file.endswith(".index"):
	return os.path.join(logs_path, file)
	return ''
	else:
	return ''

	def get_indexes():
	indexes_list=[]
	for dirpath, dirnames, filenames in os.walk("./logs/"):
	for filename in filenames:
	if filename.endswith(".index"):
	indexes_list.append(os.path.join(dirpath,filename))
	if len(indexes_list) > 0:
	return indexes_list
	else:
	return ''

	def get_name():
	if len(audio_files) > 0:
	return sorted(audio_files)[0]
	else:
	return ''

	def save_to_wav(record_button):
	if record_button is None:
	pass
	else:
	path_to_file=record_button
	new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")+'.wav'
	new_path='./audios/'+new_name
	shutil.move(path_to_file,new_path)
	return new_path

	def save_to_wav2(dropbox):
	file_path = dropbox.name
	destination_dir = './audios'

	destination_path = os.path.join(destination_dir, os.path.basename(file_path))
	shutil.copy2(file_path, destination_path)

	os.remove(file_path)

	return destination_path

	def match_index(sid0):
	folder=sid0.split(".")[0]
	parent_dir="./logs/"+folder
	if os.path.exists(parent_dir):
	for filename in os.listdir(parent_dir):
	if filename.endswith(".index"):
	index_path=os.path.join(parent_dir,filename)
	return index_path
	else:
	return ''

	def check_for_name():
	if len(names) > 0:
	return sorted(names)[0]
	else:
	return ''

	def download_from_url(url, model, associated_user=None):
	if url == '':
	return "URL cannot be left empty."
	if model =='':
	return "You need to name your model. For example: My-Model"
	url = url.strip()
	zip_dirs = ["zips", "unzips"]
	for directory in zip_dirs:
	if os.path.exists(directory):
	shutil.rmtree(directory)
	os.makedirs("zips", exist_ok=True)
	os.makedirs("unzips", exist_ok=True)
	zipfile = model + '.zip'
	zipfile_path = './zips/' + zipfile
	return
	try:
	if "drive.google.com" in url or "drive.usercontent.google.com":
	subprocess.run(["gdown", url, "--fuzzy", "-O", zipfile_path])
	elif "mega.nz" in url:
	m = Mega()
	m.download_url(url, './zips')
	else:
	subprocess.run(["wget", url, "-O", zipfile_path])
	for filename in os.listdir("./zips"):
	if filename.endswith(".zip"):
	zipfile_path = os.path.join("./zips/",filename)
	shutil.unpack_archive(zipfile_path, "./unzips", 'zip')
	else:
	return "No zipfile found."
	for root, dirs, files in os.walk('./unzips'):
	for file in files:
	file_path = os.path.join(root, file)
	if file.endswith(".index"):
	os.mkdir(f'./logs/{model}')
	shutil.copy2(file_path,f'./logs/{model}')
	if associated_user is not None:
	associated_links[file] = associated_user
	elif "G_" not in file and "D_" not in file and file.endswith(".pth"):
	shutil.copy(file_path,f'./weights/{model}.pth')
	if associated_user is not None:
	associated_links[f'{model}.pth'] = associated_user
	shutil.rmtree("zips")
	shutil.rmtree("unzips")
	change_choices()
	return "Model downloaded, you can go back to the inference page!"
	except:
	return "ERROR - The download failed. Check if the link is valid."
	def success_message(face):
	return f'{face.name} has been uploaded.', 'None'
	def mouth(size, face, voice, faces):
	if size == 'Half':
	size = 2
	else:
	size = 1
	if faces == 'None':
	character = face.name
	else:
	if faces == 'Ben Shapiro':
	character = '/content/wav2lip-HD/inputs/ben-shapiro-10.mp4'
	elif faces == 'Andrew Tate':
	character = '/content/wav2lip-HD/inputs/tate-7.mp4'
	command = "python inference.py " \
	"--checkpoint_path checkpoints/wav2lip.pth " \
	f"--face {character} " \
	f"--audio {voice} " \
	"--pads 0 20 0 0 " \
	"--outfile /content/wav2lip-HD/outputs/result.mp4 " \
	"--fps 24 " \
	f"--resize_factor {size}"
	process = subprocess.Popen(command, shell=True, cwd='/content/wav2lip-HD/Wav2Lip-master')
	stdout, stderr = process.communicate()
	return '/content/wav2lip-HD/outputs/result.mp4', 'Animation completed.'

	def stoptraining(mim):
	if int(mim) == 1:
	try:
	CSVutil('csvdb/stop.csv', 'w+', 'stop', 'True')
	os.kill(PID, signal.SIGTERM)
	except Exception as e:
	print(f"Couldn't click due to {e}")
	return (
	{"visible": False, "__type__": "update"},
	{"visible": True, "__type__": "update"},
	)




	def transcribe_btn_click(audio_choice):
	global transcript_fn
	global audio_fn

	temp_folder = "./demo/temp"
	orig_audio = audio_choice
	filename = os.path.splitext(orig_audio.split("/")[-1])[0]
	audio_fn = f"{temp_folder}/{filename}.wav"
	transcript_fn = f"{temp_folder}/{filename}.txt"
	if os.path.exists(audio_fn) and os.path.exists(transcript_fn):
	print("Audio and transcript already exist, skipping transcript")
	return

	batch_size = 1 # Adjust based on your GPU memory availability
	compute_type = "float16"
	device = "cuda" if torch.cuda.is_available() else "cpu"

	model = whisperx.load_model("large-v2", device, compute_type=compute_type)
	pre_result = model.transcribe(audio_choice, batch_size=batch_size)

	# Correctly handle the transcription result based on its structure
	if 'segments' in pre_result:
	result = " ".join([segment['text'] for segment in pre_result['segments']])
	else:
	result = pre_result.get('text', '')

	print("Transcribe text: " + result) # Directly print the result as it is now a string

	# remove model to save VRAM
	gc.collect(); torch.cuda.empty_cache(); del model

	# point to the original file or record the file
	# write down the transcript for the file, or run whisper to get the transcript (and you can modify it if it's not accurate), save it as a .txt file
	orig_audio = audio_choice
	orig_transcript = result
	# move the audio and transcript to temp folder
	os.makedirs(temp_folder, exist_ok=True)
	os.system(f"cp \"{orig_audio}\" \"{temp_folder}\"")
	filename = os.path.splitext(orig_audio.split("/")[-1])[0]
	with open(f"{temp_folder}/{filename}.txt", "w") as f:
	f.write(orig_transcript)
	# run MFA to get the alignment
	align_temp = f"{temp_folder}/mfa_alignments"
	os.makedirs(align_temp, exist_ok=True)

	audio_fn = f"{temp_folder}/{filename}.wav"
	transcript_fn = f"{temp_folder}/{filename}.txt"

	return result


	def run(seed, stop_repetition, sample_batch_size, left_margin, right_margin, codec_audio_sr, codec_sr, top_k, top_p,
	temperature, kvcache, cutoff_value, target_transcript, silence_tokens, transcribed_text):
	global voicecraft_model, voicecraft_config, phn2num

	os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
	os.environ["CUDA_VISIBLE_DEVICES"] = "0"
	os.environ["USER"] = "USER"

	print("Transcribing the input audio")
	transcribe_btn_click(input_audio_fn)
	print("Transcription complete")

	# take a look at demo/temp/mfa_alignment, decide which part of the audio to use as prompt
	cut_off_sec = cutoff_value # NOTE: according to forced-alignment file, the word "common" stop as 3.01 sec, this should be different for different audio
	target_transcript = transcribed_text + target_transcript
	print(target_transcript)
	info = torchaudio.info(audio_fn)
	audio_dur = info.num_frames / info.sample_rate

	print("audio dur s is", audio_dur, "cutoff_sec is", cut_off_sec)
	assert cut_off_sec < audio_dur, f"cut_off_sec {cut_off_sec} is larger than the audio duration {audio_dur}"
	prompt_end_frame = int(cut_off_sec * info.sample_rate)

	# # load model, tokenizer, and other necessary files
	# # original file loaded it each time. here we load it only once
	# global model_loaded
	# f model_loaded==False:

	if voicecraft_model is None:
	load_voicecraft()

	encodec_fn = "./pretrained_models/encodec_4cb2048_giga.th"
	text_tokenizer = TextTokenizer(backend="espeak")
	audio_tokenizer = AudioTokenizer(signature=encodec_fn) # will also put the neural codec model on gpu


	# # run the model to get the output
	decode_config = {'top_k': top_k, 'top_p': top_p, 'temperature': temperature, 'stop_repetition': stop_repetition,
	'kvcache': kvcache, "codec_audio_sr": codec_audio_sr, "codec_sr": codec_sr,
	"silence_tokens": silence_tokens, "sample_batch_size": sample_batch_size}
	from lib.voicecraft.inference_tts_scale import inference_one_sample
	concated_audio, gen_audio = inference_one_sample(voicecraft_model, voicecraft_config, phn2num, text_tokenizer, audio_tokenizer,
	audio_fn, target_transcript, config.device, decode_config,
	prompt_end_frame)

	# save segments for comparison
	concated_audio, gen_audio = concated_audio[0].cpu(), gen_audio[0].cpu()
	# logging.info(f"length of the resynthesize orig audio: {orig_audio.shape}")

	output_dir = "./demo/generated_tts"
	os.makedirs(output_dir, exist_ok=True)
	seg_save_fn_gen = f"{output_dir}/{os.path.basename(audio_fn)[:-4]}_gen_seed{seed}.wav"
	seg_save_fn_concat = f"{output_dir}/{os.path.basename(audio_fn)[:-4]}_concat_seed{seed}.wav"


	torchaudio.save(seg_save_fn_gen, gen_audio, int(codec_audio_sr))
	torchaudio.save(seg_save_fn_concat, concated_audio, int(codec_audio_sr))

	return [seg_save_fn_concat, seg_save_fn_gen]

	def run_joint(input_audio_fn, seed, stop_repetition, sample_batch_size, left_margin, right_margin, codec_audio_sr, codec_sr, top_k, top_p,
	temperature, kvcache, cutoff_value, target_transcript, silence_tokens, transcribed_text,
	sid,
	f0_up_key,
	f0_file,
	f0_method,
	file_index,
	#file_index2,
	# file_big_npy,
	index_rate,
	filter_radius,
	resample_sr,
	rms_mix_rate,
	protect,
	crepe_hop_length):
	global voicecraft_model, voicecraft_config, phn2num

	print("Transcribing the input audio")
	transcribe_btn_click(input_audio_fn)
	print("Transcription complete")

	os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
	os.environ["CUDA_VISIBLE_DEVICES"] = "0"
	os.environ["USER"] = "USER"
	# take a look at demo/temp/mfa_alignment, decide which part of the audio to use as prompt
	cut_off_sec = cutoff_value # NOTE: according to forced-alignment file, the word "common" stop as 3.01 sec, this should be different for different audio

	target_transcript = transcribed_text + ' ' + target_transcript
	print(target_transcript)
	info = torchaudio.info(audio_fn)
	audio_dur = info.num_frames / info.sample_rate

	assert cut_off_sec < audio_dur, f"cut_off_sec {cut_off_sec} is larger than the audio duration {audio_dur}"
	prompt_end_frame = int(cut_off_sec * info.sample_rate)

	if voicecraft_model is None:
	load_voicecraft()

	encodec_fn = "./pretrained_models/encodec_4cb2048_giga.th"
	text_tokenizer = TextTokenizer(backend="espeak")
	audio_tokenizer = AudioTokenizer(signature=encodec_fn) # will also put the neural codec model on gpu


	# # run the model to get the output
	decode_config = {'top_k': top_k, 'top_p': top_p, 'temperature': temperature, 'stop_repetition': stop_repetition,
	'kvcache': kvcache, "codec_audio_sr": codec_audio_sr, "codec_sr": codec_sr,
	"silence_tokens": silence_tokens, "sample_batch_size": sample_batch_size}
	from lib.voicecraft.inference_tts_scale import inference_one_sample
	concated_audio, gen_audio = inference_one_sample(voicecraft_model, voicecraft_config, phn2num, text_tokenizer, audio_tokenizer,
	audio_fn, target_transcript, config.device, decode_config,
	prompt_end_frame)
	print("prompt_end_frame: ", prompt_end_frame, "voicecraft_config: ", voicecraft_config, "audio_fn: ", audio_fn, "target_transcript: ", target_transcript, "config.device: ", config.device, "decode_config: ", decode_config)

	# save segments for comparison
	concated_audio, gen_audio = concated_audio[0].cpu(), gen_audio[0].cpu()
	# logging.info(f"length of the resynthesize orig audio: {orig_audio.shape}")

	output_dir = "./demo/generated_tts"
	os.makedirs(output_dir, exist_ok=True)
	seg_save_fn_gen = f"{output_dir}/{os.path.basename(audio_fn)[:-4]}_gen_seed{seed}.wav"
	seg_save_fn_concat = f"{output_dir}/{os.path.basename(audio_fn)[:-4]}_concat_seed{seed}.wav"


	torchaudio.save(seg_save_fn_gen, gen_audio, int(codec_audio_sr))
	torchaudio.save(seg_save_fn_concat, concated_audio, int(codec_audio_sr))


	global tgt_sr, net_g, vc, hubert_model, version

	f0_up_key = int(f0_up_key)
	try:
	# audio = gen_audio.squeeze()
	audio = load_audio(seg_save_fn_gen, 16000, DoFormant, Quefrency, Timbre).squeeze()
	audio_max = np.abs(audio).max() / 0.95
	if audio_max > 1:
	audio /= audio_max
	times = [0, 0, 0]
	if hubert_model == None:
	load_hubert()
	if_f0 = cpt.get("f0", 1)
	file_index = (
	(
	file_index.strip(" ")
	.strip('"')
	.strip("\n")
	.strip('"')
	.strip(" ")
	.replace("trained", "added")
	)
	) # 防止小白写错，自动帮他替换掉
	# file_big_npy = (
	# file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
	# )
	print(f"Making VC Pipeline, device: {config.device}, audio shape: {audio.shape}")
	audio_opt = vc.pipeline(
	hubert_model,
	net_g,
	sid,
	audio,
	seg_save_fn_gen,
	times,
	f0_up_key,
	f0_method,
	file_index,
	# file_big_npy,
	index_rate,
	if_f0,
	filter_radius,
	tgt_sr,
	resample_sr,
	rms_mix_rate,
	version,
	protect,
	crepe_hop_length,
	f0_file=f0_file,
	)
	if resample_sr >= 16000 and tgt_sr != resample_sr:
	tgt_sr = resample_sr
	index_info = (
	"Using index:%s." % file_index
	if os.path.exists(file_index)
	else "Index not used."
	)
	return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % (
	index_info,
	times[0],
	times[1],
	times[2],
	), (tgt_sr, audio_opt)
	except:
	info = traceback.format_exc()
	print(info)
	return info, (None, None)




	def upload_to_dataset(files, dir):
	if dir == '':
	dir = './dataset'
	if not os.path.exists(dir):
	os.makedirs(dir)
	count = 0
	for file in files:
	path=file.name
	shutil.copy2(path,dir)
	count += 1
	return f' {count} files uploaded to {dir}.'

	def zip_downloader(model):
	if not os.path.exists(f'./weights/{model}.pth'):
	return {"__type__": "update"}, f'Make sure the Voice Name is correct. I could not find {model}.pth'
	index_found = False
	for file in os.listdir(f'./logs/{model}'):
	if file.endswith('.index') and 'added' in file:
	log_file = file
	index_found = True
	if index_found:
	return [f'./weights/{model}.pth', f'./logs/{model}/{log_file}'], "Done"
	else:
	return f'./weights/{model}.pth', "Could not find Index file."


	#download_from_url('https://drive.google.com/uc?id=1O98vvnle_nZP8ZdpnZFLZ5TU1UZe7x0p&confirm=t', 'JVKE-main', 'jvke')
	#download_from_url('https://drive.google.com/uc?id=1Wag0vPlp42kRDffccXljjjlK7QsHf2xe&confirm=t', 'JVKE-main-v2', 'jvke')
	#download_from_url('https://drive.google.com/uc?id=1h810cil3YRlN4pu4oO43zKq9z3cYjItp&confirm=t', 'jvke-nighttime-v4', 'jvke')
	download_from_url('https://drive.google.com/uc?id=1fa6FSLwqSQMI49NvSXOpI4pUVuKsrop5&confirm=t', 'Andoni', 'cmss60')
	download_from_url('https://drive.google.com/uc?id=1iGhD93_szvs0xyg-U5z_jhfBECBxcTfK&confirm=t', 'Alex', 'cmss60')
	download_from_url('https://drive.google.com/uc?id=1DwRru_WFh4LS0eqU_39qEPuwltj9ZTRr&confirm=t', 'Elaine', 'cmss60')
	download_from_url('https://drive.google.com/uc?id=1Xen2BBRoqfF3CNO_XqEr2ZgCcITz--Je&confirm=t', 'Emily', 'cmss60')
	download_from_url('https://drive.google.com/uc?id=1gHfrS1rnhnj3sHdnOM4vx04rcxucc74D&confirm=t', 'Justis', 'cmss60')
	download_from_url('https://drive.google.com/uc?id=1PlQELpXawx74mEv9MYeREcyvwE7vFFe_&confirm=t', 'Kayana', 'cmss60')
	download_from_url('https://drive.google.com/uc?id=16hJvfWAhuWWVEeXyDYt9PHJl-k_Kouxf&confirm=t', 'Prince', 'cmss60')
	download_from_url('https://drive.google.com/uc?id=1zE1tP95_unNjVkqYb0aBt3AsBq_u9-R9&confirm=t', 'Lupe', 'cmss60')
	weight_root = "weights"
	index_root = "logs"
	names = []
	for name in os.listdir(weight_root):
	if name.endswith(".pth"):
	names.append(name)
	index_paths = []

	for root, dirs, files in os.walk(index_root, topdown=False):
	for name in files:
	if name.endswith(".index") and "trained" not in name:
	index_paths.append("%s/%s" % (root, name))

	with gr.Blocks(theme=gr.themes.Default(primary_hue="pink", secondary_hue="rose"), title="HITGEN AI") as app:
	with gr.Tabs():
	with gr.TabItem("Inference"):
	app.load(update_message)

	# Other RVC stuff
	with gr.Row():
	sid0 = gr.Dropdown(label="1. Choose your model", choices=sorted(names), value=check_for_name())
	refresh_button = gr.Button("Refresh", variant="primary")
	if check_for_name() != '':
	get_vc(sorted(names)[0])
	vc_transform0 = gr.Number(label="Key Shift: 0 for no key shifted output; 12 f for output an octave higher and -12 for output an octave lower.", value=0)
	#clean_button = gr.Button(i18n("卸载音色省显存"), variant="primary")
	spk_item = gr.Slider(
	minimum=0,
	maximum=2333,
	step=1,
	label="speaker id",
	value=0,
	visible=False,
	interactive=True,
	)
	#clean_button.click(fn=clean, inputs=[], outputs=[sid0])
	sid0.change(
	fn=get_vc,
	inputs=[sid0],
	outputs=[spk_item],
	)
	but0 = gr.Button("Convert", variant="primary")
	with gr.Row():
	with gr.Column():
	# with gr.Row():
	# dropbox = gr.File(label="Drag your audio file and click refresh.")
	# with gr.Row():
	# record_button=gr.Audio(source="microphone", label="Or you can use your microphone!", type="filepath")
	with gr.Row():
	input_audio0 = gr.Dropdown(
	label="2.Choose the audio file.",
	value="./audios/Test_Audio.mp3",
	choices=audio_files
	)
	# dropbox.upload(fn=save_to_wav2, inputs=[dropbox], outputs=[input_audio0])
	# dropbox.upload(fn=change_choices2, inputs=[], outputs=[input_audio0])
	refresh_button2 = gr.Button("Refresh", variant="primary", size='sm')
	transcribed_text = gr.Textbox(label="transcibed text + mfa",
	value="The dogs sat at the door.",
	info="write down the transcript for the file, or run whisper model to get the transcript. Takes time to download whisper models on first run")
	# record_button.change(fn=save_to_wav, inputs=[record_button], outputs=[input_audio0])
	# record_button.change(fn=change_choices2, inputs=[], outputs=[input_audio0])

	with gr.Row():
	# with gr.Column():
	# input_audio = gr.Audio(label="Input Audio", type="filepath")
	# # transcribe_btn_model = gr.Radio(value="base.en", interactive=True, label="what whisper model to download",
	# # choices=["tiny.en", "base.en", "small.en", "medium.en", "large"],
	# # info="VRAM usage: tiny.en 1 GB, base.en 1GB, small.en 2GB, medium.en 5GB, large 10GB.")
	# transcribed_text = gr.Textbox(label="transcibed text + mfa",
	# info="write down the transcript for the file, or run whisper model to get the transcript. Takes time to download whisper models on first run")
	# transcribe_info_text = gr.TextArea(label="How to use",
	# value="running everything for the first time will download necessary models (4GB for main encoder + model) \n load a voice and choose your whisper model, base works most of the time. \n transcription and mfa takes ~50s on a 3090 for a 7s audio clip, rerun this when uploading a new audio clip only\nchoose the END value of the cut off word \n")
	# transcribe_btn = gr.Button(value="transcribe and create mfa")


	with gr.Column():
	target_transcript = gr.Textbox(label="target transcript")
	output_audio_con = gr.Audio(label="Output Audio concatenated")
	output_audio_gen = gr.Audio(label="Output Audio generated")
	cutoff_value = gr.Number(label="cutoff_time", interactive=True, step=0.01)
	run_btn = gr.Button(value="run")
	run_btn_joint = gr.Button(value="run with RVC")

	# transcribe_btn.click(fn=transcribe_btn_click, inputs=[input_audio],
	# outputs=[transcribed_text])



	with gr.Column():
	vc_output2 = gr.Audio(
	label="Final Result! (Click on the three dots to download the audio)",
	type='filepath',
	interactive=False,
	)

	#with gr.Column():
	with gr.Accordion("Advanced TTS Settings", open=False):
	seed = gr.Number(label='seed', interactive=True, value=1)
	stop_repitition = gr.Radio(label="stop_repitition", interactive=True, choices=[1, 2, 3], value=3,
	info="if there are long silence in the generated audio, reduce the stop_repetition to 3, 2 or even 1")
	sample_batch_size = gr.Radio(label="sample_batch_size", interactive=True, choices=[4, 3, 2], value=4,
	info="if there are long silence or unnaturally strecthed words, increase sample_batch_size to 2, 3 or even 4")
	left_margin = gr.Number(label='left_margin', interactive=True, value=0.08, step=0.01,
	info=" not used for TTS, only for speech editing")
	right_margin = gr.Number(label='right_margin', interactive=True, value=0.08, step=0.01,
	info=" not used for TTS, only for speech editing")
	codecaudio_sr = gr.Number(label='codec_audio_sr', interactive=True, value=16000)
	codec_sr = gr.Number(label='codec', interactive=True, value=50)
	top_k = gr.Number(label='top_k', interactive=True, value=0)
	top_p = gr.Number(label='top_p', interactive=True, value=0.8)
	temperature = gr.Number(label='temperature', interactive=True, value=1)
	kvcache = gr.Number(label='kvcache', interactive=True, value=1,
	info='set to 0 to use less VRAM, results may be worse and slower inference')
	silence_tokens = gr.Textbox(label="silence tokens", value="[1388,1898,131]")
	with gr.Accordion("Index Settings", open=False):
	#with gr.Row():

	file_index1 = gr.Dropdown(
	label="3. Choose the index file (in case it wasn't automatically found.)",
	choices=get_indexes(),
	value=get_index(),
	interactive=True,
	)
	sid0.change(fn=match_index, inputs=[sid0],outputs=[file_index1])
	refresh_button.click(
	fn=change_choices, inputs=[], outputs=[sid0, file_index1]
	)
	# file_big_npy1 = gr.Textbox(
	# label=i18n("特征文件路径"),
	# value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
	# interactive=True,
	# )
	index_rate1 = gr.Slider(
	minimum=0,
	maximum=1,
	label="index rate",
	value=0,
	interactive=True,
	)

	# animate_button.click(fn=mouth, inputs=[size, face, vc_output2, faces], outputs=[animation, preview])

	with gr.Accordion("Advanced Options", open=False):
	f0method0 = gr.Radio(
	label="Optional: Change the Pitch Extraction Algorithm. Extraction methods are sorted from 'worst quality' to 'best quality'. If you don't know what you're doing, leave rmvpe.",
	choices=["pm", "dio", "crepe-tiny", "mangio-crepe-tiny", "crepe", "harvest", "mangio-crepe", "rmvpe"], # Fork Feature. Add Crepe-Tiny
	value="rmvpe",
	interactive=True,
	)

	crepe_hop_length = gr.Slider(
	minimum=1,
	maximum=512,
	step=1,
	label="Mangio-Crepe Hop Length. Higher numbers will reduce the chance of extreme pitch changes but lower numbers will increase accuracy. 64-192 is a good range to experiment with.",
	value=120,
	interactive=True,
	visible=False,
	)
	f0method0.change(fn=whethercrepeornah, inputs=[f0method0], outputs=[crepe_hop_length])
	filter_radius0 = gr.Slider(
	minimum=0,
	maximum=7,
	label="label",
	value=3,
	step=1,
	interactive=True,
	)
	resample_sr0 = gr.Slider(
	minimum=0,
	maximum=48000,
	label="label",
	value=0,
	step=1,
	interactive=True,
	visible=False
	)
	rms_mix_rate0 = gr.Slider(
	minimum=0,
	maximum=1,
	label="label",
	value=0.21,
	interactive=True,
	)
	protect0 = gr.Slider(
	minimum=0,
	maximum=0.5,
	label="label",
	value=0,
	step=0.01,
	interactive=True,
	)
	formanting = gr.Checkbox(
	value=bool(DoFormant),
	label="[EXPERIMENTAL] Formant shift inference audio",
	info="Used for male to female and vice-versa conversions",
	interactive=True,
	visible=True,
	)

	formant_preset = gr.Dropdown(
	value='',
	choices=get_fshift_presets(),
	label="browse presets for formanting",
	visible=bool(DoFormant),
	)
	formant_refresh_button = gr.Button(
	value='\U0001f504',
	visible=bool(DoFormant),
	variant='primary',
	)
	#formant_refresh_button = ToolButton( elem_id='1')
	#create_refresh_button(formant_preset, lambda: {"choices": formant_preset}, "refresh_list_shiftpresets")

	qfrency = gr.Slider(
	value=Quefrency,
	info="Default value is 1.0",
	label="Frequency for formant shifting",
	minimum=0.0,
	maximum=16.0,
	step=0.1,
	visible=bool(DoFormant),
	interactive=True,
	)
	tmbre = gr.Slider(
	value=Timbre,
	info="Default value is 1.0",
	label="Timbre for formant shifting",
	minimum=0.0,
	maximum=16.0,
	step=0.1,
	visible=bool(DoFormant),
	interactive=True,
	)

	formant_preset.change(fn=preset_apply, inputs=[formant_preset, qfrency, tmbre], outputs=[qfrency, tmbre])
	frmntbut = gr.Button("Apply", variant="primary", visible=bool(DoFormant))
	formanting.change(fn=formant_enabled,inputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button],outputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button])
	frmntbut.click(fn=formant_apply,inputs=[qfrency, tmbre], outputs=[qfrency, tmbre])
	formant_refresh_button.click(fn=update_fshift_presets,inputs=[formant_preset, qfrency, tmbre],outputs=[formant_preset, qfrency, tmbre])

	with gr.Row():
	vc_output1 = gr.Textbox("")
	f0_file = gr.File(label="f0 file", visible=False)

	run_btn.click(fn=run,
	inputs=[
	seed,
	stop_repitition,
	sample_batch_size,
	left_margin,
	right_margin,
	codecaudio_sr,
	codec_sr,
	top_k,
	top_p,
	temperature,
	kvcache,
	cutoff_value,
	target_transcript,
	silence_tokens,
	transcribed_text],
	outputs=[
	output_audio_con,
	output_audio_gen
	])

	but0.click(
	vc_single,
	[
	spk_item,
	input_audio0,
	vc_transform0,
	f0_file,
	f0method0,
	file_index1,
	# file_index2,
	# file_big_npy1,
	index_rate1,
	filter_radius0,
	resample_sr0,
	rms_mix_rate0,
	protect0,
	crepe_hop_length
	],
	[vc_output1, vc_output2],
	)

	run_btn_joint.click(
	fn=run_joint,
	inputs=[
	input_audio0,
	seed,
	stop_repitition,
	sample_batch_size,
	left_margin,
	right_margin,
	codecaudio_sr,
	codec_sr,
	top_k,
	top_p,
	temperature,
	kvcache,
	cutoff_value,
	target_transcript,
	silence_tokens,
	transcribed_text,
	spk_item,
	vc_transform0,
	f0_file,
	f0method0,
	file_index1,
	# file_index2,
	# file_big_npy1,
	index_rate1,
	filter_radius0,
	resample_sr0,
	rms_mix_rate0,
	protect0,
	crepe_hop_length
	],
	outputs=[vc_output1, vc_output2])

	with gr.Accordion("Batch Conversion",open=False, visible=False):
	with gr.Row():
	with gr.Column():
	vc_transform1 = gr.Number(
	label="speaker id", value=0
	)
	opt_input = gr.Textbox(label="opt", value="opt")
	f0method1 = gr.Radio(
	label="f0 method",
	choices=["pm", "harvest", "crepe", "rmvpe"],
	value="rmvpe",
	interactive=True,
	)
	filter_radius1 = gr.Slider(
	minimum=0,
	maximum=7,
	label="harvest",
	value=3,
	step=1,
	interactive=True,
	)
	with gr.Column():
	file_index3 = gr.Textbox(
	label="file index",
	value="",
	interactive=True,
	)
	file_index4 = gr.Dropdown(
	label="index path (dropdown)",
	choices=sorted(index_paths),
	interactive=True,
	)
	refresh_button.click(
	fn=lambda username: change_choices(username)[1],
	inputs=[gr.State('username')],
	outputs=file_index4,
	)
	# file_big_npy2 = gr.Textbox(
	# label=i18n("特征文件路径"),
	# value="E:\\codes\\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
	# interactive=True,
	# )
	index_rate2 = gr.Slider(
	minimum=0,
	maximum=1,
	label="index rate 2",
	value=1,
	interactive=True,
	)
	with gr.Column():
	resample_sr1 = gr.Slider(
	minimum=0,
	maximum=48000,
	label="resample rate",
	value=0,
	step=1,
	interactive=True,
	)
	rms_mix_rate1 = gr.Slider(
	minimum=0,
	maximum=1,
	label="rms mix rate",
	value=1,
	interactive=True,
	)
	protect1 = gr.Slider(
	minimum=0,
	maximum=0.5,
	label="protection rate",
	value=0.33,
	step=0.01,
	interactive=True,
	)
	with gr.Column():
	dir_input = gr.Textbox(
	label="directory input",
	value="E:\codes\py39\\test-20230416b\\todo-songs",
	)
	inputs = gr.File(
	file_count="multiple", label="input"
	)
	with gr.Row():
	format1 = gr.Radio(
	label="output format",
	choices=["wav", "flac", "mp3", "m4a"],
	value="flac",
	interactive=True,
	)
	but1 = gr.Button("primary", variant="primary")
	vc_output3 = gr.Textbox(label="label")
	but1.click(
	vc_multi,
	[
	spk_item,
	dir_input,
	opt_input,
	inputs,
	vc_transform1,
	f0method1,
	file_index3,
	file_index4,
	# file_big_npy2,
	index_rate2,
	filter_radius1,
	resample_sr1,
	rms_mix_rate1,
	protect1,
	format1,
	crepe_hop_length,
	],
	[vc_output3],
	)
	but1.click(fn=lambda: easy_uploader.clear())
	with gr.TabItem("Download Voice Models"):
	with gr.Row():
	url=gr.Textbox(label="Huggingface Link:")
	with gr.Row():
	model = gr.Textbox(label="Name of the model (without spaces):")
	download_button=gr.Button("Download")
	with gr.Row():
	status_bar=gr.Textbox(label="Download Status")
	download_button.click(fn=download_from_url, inputs=[url, model], outputs=[status_bar])

	def has_two_files_in_pretrained_folder():
	pretrained_folder = "./pretrained/"
	if not os.path.exists(pretrained_folder):
	return False

	files_in_folder = os.listdir(pretrained_folder)
	num_files = len(files_in_folder)
	return num_files >= 2

	if has_two_files_in_pretrained_folder():
	print("Pretrained weights are downloaded. Training tab enabled!\n-------------------------------")
	with gr.TabItem("Train", visible=False):
	with gr.Row():
	with gr.Column():
	exp_dir1 = gr.Textbox(label="Voice Name:", value="My-Voice")
	sr2 = gr.Radio(
	label="sample rate",
	choices=["40k", "48k"],
	value="40k",
	interactive=True,
	visible=False
	)
	if_f0_3 = gr.Radio(
	label="extract f0",
	choices=[True, False],
	value=True,
	interactive=True,
	visible=False
	)
	version19 = gr.Radio(
	label="RVC version",
	choices=["v1", "v2"],
	value="v2",
	interactive=True,
	visible=False,
	)
	np7 = gr.Slider(
	minimum=0,
	maximum=config.n_cpu,
	step=1,
	label="# of CPUs for data processing (Leave as it is)",
	value=config.n_cpu,
	interactive=True,
	visible=True
	)
	trainset_dir4 = gr.Textbox(label="Path to your dataset (audios, not zip):", value="./dataset")
	easy_uploader = gr.Files(label='OR Drop your audios here. They will be uploaded in your dataset path above.',file_types=['audio'])
	but1 = gr.Button("1. Process The Dataset", variant="primary")
	info1 = gr.Textbox(label="Status (wait until it says 'end preprocess'):", value="")
	easy_uploader.upload(fn=upload_to_dataset, inputs=[easy_uploader, trainset_dir4], outputs=[info1])
	but1.click(
	preprocess_dataset, [trainset_dir4, exp_dir1, sr2, np7], [info1]
	)
	with gr.Column():
	spk_id5 = gr.Slider(
	minimum=0,
	maximum=4,
	step=1,
	label="speaker id",
	value=0,
	interactive=True,
	visible=False
	)
	with gr.Accordion('GPU Settings', open=False, visible=False):
	gpus6 = gr.Textbox(
	label="0-1-2",
	value=gpus,
	interactive=True,
	visible=False
	)
	gpu_info9 = gr.Textbox(label="GPU", value=gpu_info)
	f0method8 = gr.Radio(
	label="f0 method",
	choices=["harvest","crepe", "mangio-crepe", "rmvpe"], # Fork feature: Crepe on f0 extraction for training.
	value="rmvpe",
	interactive=True,
	)

	extraction_crepe_hop_length = gr.Slider(
	minimum=1,
	maximum=512,
	step=1,
	label="crepe_hop_length",
	value=128,
	interactive=True,
	visible=False,
	)
	f0method8.change(fn=whethercrepeornah, inputs=[f0method8], outputs=[extraction_crepe_hop_length])
	but2 = gr.Button("2. Pitch Extraction", variant="primary")
	info2 = gr.Textbox(label="Status(Check the Colab Notebook's cell output):", value="", max_lines=8)
	but2.click(
	extract_f0_feature,
	[gpus6, np7, f0method8, if_f0_3, exp_dir1, version19, extraction_crepe_hop_length],
	[info2],
	)
	with gr.Row():
	with gr.Column():
	total_epoch11 = gr.Slider(
	minimum=1,
	maximum=5000,
	step=10,
	label="Total # of training epochs (IF you choose a value too high, your model will sound horribly overtrained.):",
	value=250,
	interactive=True,
	)
	butstop = gr.Button(
	"Stop Training",
	variant='primary',
	visible=False,
	)
	but3 = gr.Button("3. Train Model", variant="primary", visible=True)

	but3.click(fn=stoptraining, inputs=[gr.Number(value=0, visible=False)], outputs=[but3, butstop])
	butstop.click(fn=stoptraining, inputs=[gr.Number(value=1, visible=False)], outputs=[butstop, but3])


	but4 = gr.Button("4.Train Index", variant="primary")
	info3 = gr.Textbox(label="Status(Check the Colab Notebook's cell output):", value="", max_lines=10)
	with gr.Accordion("Training Preferences (You can leave these as they are)", open=False):
	#gr.Markdown(value=i18n("step3: 填写训练设置, 开始训练模型和索引"))
	with gr.Column():
	save_epoch10 = gr.Slider(
	minimum=1,
	maximum=200,
	step=1,
	label="Backup every X amount of epochs:",
	value=10,
	interactive=True,
	)
	batch_size12 = gr.Slider(
	minimum=1,
	maximum=40,
	step=1,
	label="Batch Size (LEAVE IT unless you know what you're doing!):",
	value=default_batch_size,
	interactive=True,
	)
	if_save_latest13 = gr.Checkbox(
	label="Save only the latest '.ckpt' file to save disk space.",
	value=True,
	interactive=True,
	)
	if_cache_gpu17 = gr.Checkbox(
	label="Cache all training sets to GPU memory. Caching small datasets (less than 10 minutes) can speed up training, but caching large datasets will consume a lot of GPU memory and may not provide much speed improvement.",
	value=False,
	interactive=True,
	)
	if_save_every_weights18 = gr.Checkbox(
	label="Save a small final model to the 'weights' folder at each save point.",
	value=True,
	interactive=True,
	)
	zip_model = gr.Button('5. Download Model')
	zipped_model = gr.Files(label='Your Model and Index file can be downloaded here:')
	zip_model.click(fn=zip_downloader, inputs=[exp_dir1], outputs=[zipped_model, info3])
	with gr.Group():
	with gr.Accordion("Base Model Locations:", open=False, visible=False):
	pretrained_G14 = gr.Textbox(
	label="G PATH",
	value="pretrained_v2/f0G40k.pth",
	interactive=True,
	)
	pretrained_D15 = gr.Textbox(
	label="D PATH",
	value="pretrained_v2/f0D40k.pth",
	interactive=True,
	)
	gpus16 = gr.Textbox(
	label="GPU NUM",
	value=gpus,
	interactive=True,
	)
	sr2.change(
	change_sr2,
	[sr2, if_f0_3, version19],
	[pretrained_G14, pretrained_D15, version19],
	)
	version19.change(
	change_version19,
	[sr2, if_f0_3, version19],
	[pretrained_G14, pretrained_D15],
	)
	if_f0_3.change(
	change_f0,
	[if_f0_3, sr2, version19],
	[f0method8, pretrained_G14, pretrained_D15],
	)
	but5 = gr.Button("label", variant="primary", visible=False)
	but3.click(
	click_train,
	[
	exp_dir1,
	sr2,
	if_f0_3,
	spk_id5,
	save_epoch10,
	total_epoch11,
	batch_size12,
	if_save_latest13,
	pretrained_G14,
	pretrained_D15,
	gpus16,
	if_cache_gpu17,
	if_save_every_weights18,
	version19,
	],
	[
	info3,
	butstop,
	but3,
	],
	)
	but4.click(train_index, [exp_dir1, version19], info3)
	but5.click(
	train1key,
	[
	exp_dir1,
	sr2,
	if_f0_3,
	trainset_dir4,
	spk_id5,
	np7,
	f0method8,
	save_epoch10,
	total_epoch11,
	batch_size12,
	if_save_latest13,
	pretrained_G14,
	pretrained_D15,
	gpus16,
	if_cache_gpu17,
	if_save_every_weights18,
	version19,
	extraction_crepe_hop_length
	],
	info3,
	)

	else:
	print(
	"Pretrained weights not downloaded. Disabling training tab.\n"
	"Wondering how to train a voice? Join AI HUB Discord Server! https://discord.gg/aihub\n"
	"-------------------------------\n"
	)

	app.queue(concurrency_count=511, max_size=1022).launch(share=False, quiet=False, auth=[('jvke', 'thisfeelslikeai'), ('cmss60', 'yourseedislate')])
	#endregion