rvc / app.py
mattricesound's picture
Still trying to get audio to change5
b31036b
import gradio as gr
from infer_rvc_python import BaseLoader
import soundfile as sf
import random
from urllib.request import urlretrieve
import os
import zipfile
files_to_retrieve = [
"https://replicate.delivery/pbxt/N97QM3XNFrooJhV6Fb0meBff0aAG1rEDfvuxcdLS6fTx1vmWC/test.zip",
# "https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt?download=true",
# "https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt?download=true"
]
for file in files_to_retrieve:
print(f"Downloading {file}")
urlretrieve(file, file.split("/")[-1])
# unzip test.zip
with zipfile.ZipFile("test.zip", "r") as zip_ref:
zip_ref.extractall(".")
converter = BaseLoader(
only_cpu=True, hubert_path="./hubert_base.pt", rmvpe_path="./rmvpe.pt"
)
model = "test.pth"
index = "added_IVF839_Flat_nprobe_1_test_v2.index"
def voice_conversion(
audio,
pitch_change,
filter_radius,
envelope_ratio,
index_influence,
consonant_breath_protection,
):
global output_file
audio_out = run(
[str(audio)],
model,
"rmvpe+",
pitch_change,
index,
index_influence,
filter_radius,
envelope_ratio,
consonant_breath_protection,
)
print(audio_out)
# output_audio, sr = sf.read(output_file, dtype="int32")
return audio_out
def convert_now(audio_files, random_tag):
return converter(audio_files, random_tag, overwrite=False, parallel_workers=8)
def run(
audio_files,
file_m,
pitch_alg,
pitch_lvl,
file_index,
index_inf,
r_m_f,
e_r,
c_b_p,
):
random_tag = "USER_" + str(random.randint(10000000, 99999999))
print("PITCH LVL: ", pitch_lvl)
converter.apply_conf(
tag=random_tag,
file_model=file_m,
pitch_algo=pitch_alg,
pitch_lvl=pitch_lvl,
file_index=file_index,
index_influence=index_inf,
respiration_median_filtering=r_m_f,
envelope_ratio=e_r,
consonant_breath_protection=c_b_p,
resample_sr=44100 if audio_files[0].endswith(".mp3") else 0,
)
output = convert_now(audio_files, random_tag)
audio, sr = sf.read(output[0], dtype="int32")
return (sr, audio)
def ui():
with gr.Blocks() as demo:
audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath")
with gr.Row():
pitch_slider = gr.Slider(
minimum=-24,
maximum=24,
value=0,
step=1,
label="Pitch",
interactive=True,
)
index_influence_slider = gr.Slider(
minimum=0,
maximum=1,
value=0.75,
step=0.01,
label="Index Influence",
interactive=True,
)
respiration_median_filtering = gr.Slider(
minimum=0,
maximum=10,
value=3,
step=1,
label="Resp. Median Filtering",
interactive=True,
)
envelope_ratio = gr.Slider(
minimum=0,
maximum=1,
value=0.25,
step=0.01,
label="Envelope Ratio",
interactive=True,
)
consonant_breath_protection = gr.Slider(
minimum=0,
maximum=1,
value=0.5,
step=0.01,
label="Consonant Breath Protection",
interactive=True,
)
button = gr.Button("Convert")
audio_output = gr.Audio(interactive=False, type="numpy")
button.click(
voice_conversion,
inputs=[
audio_input,
pitch_slider,
respiration_median_filtering,
envelope_ratio,
index_influence_slider,
consonant_breath_protection,
],
outputs=[audio_output],
)
return demo
ui().launch(auth=("output", "becreative"))