Spaces:
Runtime error
Runtime error
Commit
·
0019e18
0
Parent(s):
Init commit
Browse files- .gitignore +6 -0
- README.md +5 -0
- app.py +160 -0
- gitattributes +35 -0
- inference.py +65 -0
- playground.ipynb +164 -0
- requirements.txt +1 -0
.gitignore
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
env/
|
2 |
+
*.mp3
|
3 |
+
*.pt
|
4 |
+
*.index
|
5 |
+
*.pth
|
6 |
+
*.npy
|
README.md
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
title: Rvc
|
4 |
+
sdk: gradio
|
5 |
+
---
|
app.py
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from infer_rvc_python import BaseLoader
|
3 |
+
import random
|
4 |
+
from urllib.request import urlretrieve
|
5 |
+
|
6 |
+
files_to_retrieve = [
|
7 |
+
"https://replicate.delivery/pbxt/N97QM3XNFrooJhV6Fb0meBff0aAG1rEDfvuxcdLS6fTx1vmWC/test.zip",
|
8 |
+
"https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt?download=true",
|
9 |
+
"https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt?download=true"
|
10 |
+
]
|
11 |
+
|
12 |
+
for file in files_to_retrieve:
|
13 |
+
print(f"Downloading {file}")
|
14 |
+
urlretrieve(file, file.split("/")[-1])
|
15 |
+
|
16 |
+
# unzip test.zip
|
17 |
+
import zipfile
|
18 |
+
with zipfile.ZipFile("test.zip", 'r') as zip_ref:
|
19 |
+
zip_ref.extractall(".")
|
20 |
+
|
21 |
+
|
22 |
+
converter = BaseLoader(
|
23 |
+
only_cpu=True, hubert_path="./hubert_base.pt", rmvpe_path="./rmvpe.pt"
|
24 |
+
)
|
25 |
+
|
26 |
+
model = "test.pth"
|
27 |
+
index = "added_IVF839_Flat_nprobe_1_test_v2.index"
|
28 |
+
|
29 |
+
|
30 |
+
|
31 |
+
|
32 |
+
|
33 |
+
def voice_conversion(
|
34 |
+
audio,
|
35 |
+
pitch_change,
|
36 |
+
filter_radius,
|
37 |
+
envelope_ratio,
|
38 |
+
index_influence,
|
39 |
+
consonant_breath_protection,
|
40 |
+
):
|
41 |
+
audio_out = run(
|
42 |
+
[str(audio)],
|
43 |
+
model,
|
44 |
+
"rmvpe+",
|
45 |
+
pitch_change,
|
46 |
+
index,
|
47 |
+
index_influence,
|
48 |
+
filter_radius,
|
49 |
+
envelope_ratio,
|
50 |
+
consonant_breath_protection,
|
51 |
+
)
|
52 |
+
print(audio_out)
|
53 |
+
return audio_out[0]
|
54 |
+
|
55 |
+
|
56 |
+
|
57 |
+
def convert_now(audio_files, random_tag):
|
58 |
+
return converter(audio_files, random_tag, overwrite=False, parallel_workers=8)
|
59 |
+
|
60 |
+
|
61 |
+
def run(
|
62 |
+
audio_files,
|
63 |
+
file_m,
|
64 |
+
pitch_alg,
|
65 |
+
pitch_lvl,
|
66 |
+
file_index,
|
67 |
+
index_inf,
|
68 |
+
r_m_f,
|
69 |
+
e_r,
|
70 |
+
c_b_p,
|
71 |
+
):
|
72 |
+
random_tag = "USER_" + str(random.randint(10000000, 99999999))
|
73 |
+
|
74 |
+
converter.apply_conf(
|
75 |
+
tag=random_tag,
|
76 |
+
file_model=file_m,
|
77 |
+
pitch_algo=pitch_alg,
|
78 |
+
pitch_lvl=pitch_lvl,
|
79 |
+
file_index=file_index,
|
80 |
+
index_influence=index_inf,
|
81 |
+
respiration_median_filtering=r_m_f,
|
82 |
+
envelope_ratio=e_r,
|
83 |
+
consonant_breath_protection=c_b_p,
|
84 |
+
resample_sr=44100 if audio_files[0].endswith(".mp3") else 0,
|
85 |
+
)
|
86 |
+
|
87 |
+
return convert_now(audio_files, random_tag)
|
88 |
+
|
89 |
+
|
90 |
+
|
91 |
+
# Create the Gradio interface
|
92 |
+
# audio_input = gr.Audio(type="file")
|
93 |
+
# audio_output = gr.Audio(type="file")
|
94 |
+
|
95 |
+
# gr.Interface(fn=voice_conversion, inputs=audio_input, outputs=audio_output).launch()
|
96 |
+
|
97 |
+
|
98 |
+
def ui():
|
99 |
+
with gr.Blocks() as demo:
|
100 |
+
audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath")
|
101 |
+
with gr.Row():
|
102 |
+
pitch_slider = gr.Slider(
|
103 |
+
minimum=-24,
|
104 |
+
maximum=24,
|
105 |
+
value=0,
|
106 |
+
step=1,
|
107 |
+
label="Pitch",
|
108 |
+
interactive=True,
|
109 |
+
)
|
110 |
+
index_influence_slider = gr.Slider(
|
111 |
+
minimum=0,
|
112 |
+
maximum=1,
|
113 |
+
value=0.75,
|
114 |
+
step=0.01,
|
115 |
+
label="Index Influence",
|
116 |
+
interactive=True,
|
117 |
+
)
|
118 |
+
respiration_median_filtering = gr.Slider(
|
119 |
+
minimum=0,
|
120 |
+
maximum=10,
|
121 |
+
value=3,
|
122 |
+
step=1,
|
123 |
+
label="Resp. Median Filtering",
|
124 |
+
interactive=True,
|
125 |
+
)
|
126 |
+
envelope_ratio = gr.Slider(
|
127 |
+
minimum=0,
|
128 |
+
maximum=1,
|
129 |
+
value=0.25,
|
130 |
+
step=0.01,
|
131 |
+
label="Envelope Ratio",
|
132 |
+
interactive=True,
|
133 |
+
)
|
134 |
+
consonant_breath_protection = gr.Slider(
|
135 |
+
minimum=0,
|
136 |
+
maximum=1,
|
137 |
+
value=0.5,
|
138 |
+
step=0.01,
|
139 |
+
label="Consonant Breath Protection",
|
140 |
+
interactive=True,
|
141 |
+
)
|
142 |
+
button = gr.Button("Convert")
|
143 |
+
audio_output = gr.Audio(type="filepath")
|
144 |
+
button.click(
|
145 |
+
voice_conversion,
|
146 |
+
inputs=[
|
147 |
+
audio_input,
|
148 |
+
pitch_slider,
|
149 |
+
respiration_median_filtering,
|
150 |
+
envelope_ratio,
|
151 |
+
index_influence_slider,
|
152 |
+
consonant_breath_protection,
|
153 |
+
],
|
154 |
+
outputs=audio_output,
|
155 |
+
)
|
156 |
+
|
157 |
+
return demo
|
158 |
+
|
159 |
+
|
160 |
+
ui().launch()
|
gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
inference.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
# import gradio as gr
|
4 |
+
# import spaces
|
5 |
+
from infer_rvc_python import BaseLoader
|
6 |
+
import random
|
7 |
+
import logging
|
8 |
+
import time
|
9 |
+
import soundfile as sf
|
10 |
+
from infer_rvc_python.main import download_manager
|
11 |
+
import zipfile
|
12 |
+
|
13 |
+
converter = BaseLoader(
|
14 |
+
only_cpu=True, hubert_path="./hubert_base.pt", rmvpe_path="./rmvpe.pt"
|
15 |
+
)
|
16 |
+
|
17 |
+
|
18 |
+
def main():
|
19 |
+
audio_out = run(
|
20 |
+
["super-shy-mdx.mp3"],
|
21 |
+
"test.pth",
|
22 |
+
"rmvpe+",
|
23 |
+
-12,
|
24 |
+
"added_IVF839_Flat_nprobe_1_test_v2.index",
|
25 |
+
0.75, # index_influence
|
26 |
+
3, # respiration_median_filtering,
|
27 |
+
0.25, # envelope_ratio,
|
28 |
+
0.5, # consonant_breath_protection
|
29 |
+
)
|
30 |
+
|
31 |
+
|
32 |
+
def convert_now(audio_files, random_tag, converter):
|
33 |
+
return converter(audio_files, random_tag, overwrite=False, parallel_workers=0)
|
34 |
+
|
35 |
+
|
36 |
+
def run(
|
37 |
+
audio_files,
|
38 |
+
file_m,
|
39 |
+
pitch_alg,
|
40 |
+
pitch_lvl,
|
41 |
+
file_index,
|
42 |
+
index_inf,
|
43 |
+
r_m_f,
|
44 |
+
e_r,
|
45 |
+
c_b_p,
|
46 |
+
):
|
47 |
+
random_tag = "USER_" + str(random.randint(10000000, 99999999))
|
48 |
+
|
49 |
+
converter.apply_conf(
|
50 |
+
tag=random_tag,
|
51 |
+
file_model=file_m,
|
52 |
+
pitch_algo=pitch_alg,
|
53 |
+
pitch_lvl=pitch_lvl,
|
54 |
+
file_index=file_index,
|
55 |
+
index_influence=index_inf,
|
56 |
+
respiration_median_filtering=r_m_f,
|
57 |
+
envelope_ratio=e_r,
|
58 |
+
consonant_breath_protection=c_b_p,
|
59 |
+
resample_sr=44100 if audio_files[0].endswith(".mp3") else 0,
|
60 |
+
)
|
61 |
+
|
62 |
+
return convert_now(audio_files, random_tag, converter)
|
63 |
+
|
64 |
+
if __name__ == "__main__":
|
65 |
+
main()
|
playground.ipynb
ADDED
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import os\n",
|
10 |
+
"# import gradio as gr\n",
|
11 |
+
"# import spaces\n",
|
12 |
+
"from infer_rvc_python import BaseLoader\n",
|
13 |
+
"import random\n",
|
14 |
+
"import logging\n",
|
15 |
+
"import time\n",
|
16 |
+
"import soundfile as sf\n",
|
17 |
+
"from infer_rvc_python.main import download_manager\n",
|
18 |
+
"import zipfile"
|
19 |
+
]
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"cell_type": "code",
|
23 |
+
"execution_count": 2,
|
24 |
+
"metadata": {},
|
25 |
+
"outputs": [],
|
26 |
+
"source": [
|
27 |
+
"converter = BaseLoader(only_cpu=False, hubert_path=\"./hubert_base.pt\", rmvpe_path=\"./rmvpe.pt\")"
|
28 |
+
]
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"cell_type": "code",
|
32 |
+
"execution_count": 3,
|
33 |
+
"metadata": {},
|
34 |
+
"outputs": [],
|
35 |
+
"source": [
|
36 |
+
"def convert_now(audio_files, random_tag, converter):\n",
|
37 |
+
" return converter(\n",
|
38 |
+
" audio_files,\n",
|
39 |
+
" random_tag,\n",
|
40 |
+
" overwrite=False,\n",
|
41 |
+
" parallel_workers=8\n",
|
42 |
+
" )\n",
|
43 |
+
"\n",
|
44 |
+
"\n",
|
45 |
+
"def run(\n",
|
46 |
+
" audio_files,\n",
|
47 |
+
" file_m,\n",
|
48 |
+
" pitch_alg,\n",
|
49 |
+
" pitch_lvl,\n",
|
50 |
+
" file_index,\n",
|
51 |
+
" index_inf,\n",
|
52 |
+
" r_m_f,\n",
|
53 |
+
" e_r,\n",
|
54 |
+
" c_b_p,\n",
|
55 |
+
"):\n",
|
56 |
+
" random_tag = \"USER_\"+str(random.randint(10000000, 99999999))\n",
|
57 |
+
"\n",
|
58 |
+
" converter.apply_conf(\n",
|
59 |
+
" tag=random_tag,\n",
|
60 |
+
" file_model=file_m,\n",
|
61 |
+
" pitch_algo=pitch_alg,\n",
|
62 |
+
" pitch_lvl=pitch_lvl,\n",
|
63 |
+
" file_index=file_index,\n",
|
64 |
+
" index_influence=index_inf,\n",
|
65 |
+
" respiration_median_filtering=r_m_f,\n",
|
66 |
+
" envelope_ratio=e_r,\n",
|
67 |
+
" consonant_breath_protection=c_b_p,\n",
|
68 |
+
" resample_sr=44100 if audio_files[0].endswith('.mp3') else 0, \n",
|
69 |
+
" )\n",
|
70 |
+
"\n",
|
71 |
+
" return convert_now(audio_files, random_tag, converter)"
|
72 |
+
]
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"cell_type": "code",
|
76 |
+
"execution_count": 4,
|
77 |
+
"metadata": {},
|
78 |
+
"outputs": [
|
79 |
+
{
|
80 |
+
"name": "stderr",
|
81 |
+
"output_type": "stream",
|
82 |
+
"text": [
|
83 |
+
"[INFO] >> Supported N-card not found, using MPS for inference\n",
|
84 |
+
"[INFO] >> Config: Device is mps, half precision is True\n",
|
85 |
+
"[INFO] >> Parallel workers: 8\n",
|
86 |
+
"Progress: 0%| | 0/1 [00:00<?, ?it/s][INFO] >> Loading test.pth\n"
|
87 |
+
]
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"name": "stdout",
|
91 |
+
"output_type": "stream",
|
92 |
+
"text": [
|
93 |
+
"gin_channels: 256 self.spk_embed_dim: 109\n"
|
94 |
+
]
|
95 |
+
},
|
96 |
+
{
|
97 |
+
"name": "stderr",
|
98 |
+
"output_type": "stream",
|
99 |
+
"text": [
|
100 |
+
"[INFO] >> Loading vocal pitch estimator model\n"
|
101 |
+
]
|
102 |
+
},
|
103 |
+
{
|
104 |
+
"ename": "KeyboardInterrupt",
|
105 |
+
"evalue": "",
|
106 |
+
"output_type": "error",
|
107 |
+
"traceback": [
|
108 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
109 |
+
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
110 |
+
"Cell \u001b[0;32mIn[4], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msuper-shy-mdx.mp3\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtest.pth\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrmvpe+\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m12\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43madded_IVF839_Flat_nprobe_1_test_v2.index\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m0.75\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# index_influence\u001b[39;49;00m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# respiration_median_filtering,\u001b[39;49;00m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m0.25\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# envelope_ratio,\u001b[39;49;00m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m0.5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# consonant_breath_protection\u001b[39;49;00m\n\u001b[1;32m 11\u001b[0m \u001b[43m)\u001b[49m\n",
|
111 |
+
"Cell \u001b[0;32mIn[3], line 36\u001b[0m, in \u001b[0;36mrun\u001b[0;34m(audio_files, file_m, pitch_alg, pitch_lvl, file_index, index_inf, r_m_f, e_r, c_b_p)\u001b[0m\n\u001b[1;32m 21\u001b[0m random_tag \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUSER_\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m+\u001b[39m\u001b[38;5;28mstr\u001b[39m(random\u001b[38;5;241m.\u001b[39mrandint(\u001b[38;5;241m10000000\u001b[39m, \u001b[38;5;241m99999999\u001b[39m))\n\u001b[1;32m 23\u001b[0m converter\u001b[38;5;241m.\u001b[39mapply_conf(\n\u001b[1;32m 24\u001b[0m tag\u001b[38;5;241m=\u001b[39mrandom_tag,\n\u001b[1;32m 25\u001b[0m file_model\u001b[38;5;241m=\u001b[39mfile_m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 33\u001b[0m resample_sr\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m44100\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m audio_files[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mendswith(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.mp3\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;241m0\u001b[39m, \n\u001b[1;32m 34\u001b[0m )\n\u001b[0;32m---> 36\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mconvert_now\u001b[49m\u001b[43m(\u001b[49m\u001b[43maudio_files\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_tag\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconverter\u001b[49m\u001b[43m)\u001b[49m\n",
|
112 |
+
"Cell \u001b[0;32mIn[3], line 2\u001b[0m, in \u001b[0;36mconvert_now\u001b[0;34m(audio_files, random_tag, converter)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mconvert_now\u001b[39m(audio_files, random_tag, converter):\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mconverter\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43maudio_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_tag\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43moverwrite\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mparallel_workers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m8\u001b[39;49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
|
113 |
+
"File \u001b[0;32m~/Developer/playground/rvc-infer-demo/env/lib/python3.10/site-packages/infer_rvc_python/main.py:780\u001b[0m, in \u001b[0;36mBaseLoader.__call__\u001b[0;34m(self, audio_files, tag_list, overwrite, parallel_workers, type_output)\u001b[0m\n\u001b[1;32m 778\u001b[0m \u001b[38;5;66;03m# Run last\u001b[39;00m\n\u001b[1;32m 779\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m threads:\n\u001b[0;32m--> 780\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_threads\u001b[49m\u001b[43m(\u001b[49m\u001b[43mthreads\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 782\u001b[0m progress_bar\u001b[38;5;241m.\u001b[39mupdate(\u001b[38;5;28mlen\u001b[39m(threads))\n\u001b[1;32m 783\u001b[0m progress_bar\u001b[38;5;241m.\u001b[39mclose()\n",
|
114 |
+
"File \u001b[0;32m~/Developer/playground/rvc-infer-demo/env/lib/python3.10/site-packages/infer_rvc_python/main.py:563\u001b[0m, in \u001b[0;36mBaseLoader.run_threads\u001b[0;34m(self, threads)\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[38;5;66;03m# Wait for all threads to finish\u001b[39;00m\n\u001b[1;32m 562\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m thread \u001b[38;5;129;01min\u001b[39;00m threads:\n\u001b[0;32m--> 563\u001b[0m \u001b[43mthread\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 565\u001b[0m gc\u001b[38;5;241m.\u001b[39mcollect()\n\u001b[1;32m 566\u001b[0m torch\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;241m.\u001b[39mempty_cache()\n",
|
115 |
+
"File \u001b[0;32m/opt/homebrew/Cellar/[email protected]/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/threading.py:1096\u001b[0m, in \u001b[0;36mThread.join\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 1093\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcannot join current thread\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1095\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1096\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_wait_for_tstate_lock\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1097\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1098\u001b[0m \u001b[38;5;66;03m# the behavior of a negative timeout isn't documented, but\u001b[39;00m\n\u001b[1;32m 1099\u001b[0m \u001b[38;5;66;03m# historically .join(timeout=x) for x<0 has acted as if timeout=0\u001b[39;00m\n\u001b[1;32m 1100\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_wait_for_tstate_lock(timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mmax\u001b[39m(timeout, \u001b[38;5;241m0\u001b[39m))\n",
|
116 |
+
"File \u001b[0;32m/opt/homebrew/Cellar/[email protected]/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/threading.py:1116\u001b[0m, in \u001b[0;36mThread._wait_for_tstate_lock\u001b[0;34m(self, block, timeout)\u001b[0m\n\u001b[1;32m 1113\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 1115\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1116\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[43mlock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43macquire\u001b[49m\u001b[43m(\u001b[49m\u001b[43mblock\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m 1117\u001b[0m lock\u001b[38;5;241m.\u001b[39mrelease()\n\u001b[1;32m 1118\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_stop()\n",
|
117 |
+
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
118 |
+
]
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"source": [
|
122 |
+
"run(\n",
|
123 |
+
" [\"super-shy-mdx.mp3\"],\n",
|
124 |
+
" \"test.pth\",\n",
|
125 |
+
" \"rmvpe+\",\n",
|
126 |
+
" -12,\n",
|
127 |
+
" \"added_IVF839_Flat_nprobe_1_test_v2.index\",\n",
|
128 |
+
" 0.75, # index_influence\n",
|
129 |
+
" 3, # respiration_median_filtering,\n",
|
130 |
+
" 0.25, # envelope_ratio,\n",
|
131 |
+
" 0.5, # consonant_breath_protection\n",
|
132 |
+
")"
|
133 |
+
]
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"cell_type": "code",
|
137 |
+
"execution_count": null,
|
138 |
+
"metadata": {},
|
139 |
+
"outputs": [],
|
140 |
+
"source": []
|
141 |
+
}
|
142 |
+
],
|
143 |
+
"metadata": {
|
144 |
+
"kernelspec": {
|
145 |
+
"display_name": "env",
|
146 |
+
"language": "python",
|
147 |
+
"name": "python3"
|
148 |
+
},
|
149 |
+
"language_info": {
|
150 |
+
"codemirror_mode": {
|
151 |
+
"name": "ipython",
|
152 |
+
"version": 3
|
153 |
+
},
|
154 |
+
"file_extension": ".py",
|
155 |
+
"mimetype": "text/x-python",
|
156 |
+
"name": "python",
|
157 |
+
"nbconvert_exporter": "python",
|
158 |
+
"pygments_lexer": "ipython3",
|
159 |
+
"version": "3.10.14"
|
160 |
+
}
|
161 |
+
},
|
162 |
+
"nbformat": 4,
|
163 |
+
"nbformat_minor": 2
|
164 |
+
}
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
infer-rvc-python
|