mattricesound commited on
Commit
0019e18
·
0 Parent(s):

Init commit

Browse files
Files changed (7) hide show
  1. .gitignore +6 -0
  2. README.md +5 -0
  3. app.py +160 -0
  4. gitattributes +35 -0
  5. inference.py +65 -0
  6. playground.ipynb +164 -0
  7. requirements.txt +1 -0
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ env/
2
+ *.mp3
3
+ *.pt
4
+ *.index
5
+ *.pth
6
+ *.npy
README.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ title: Rvc
4
+ sdk: gradio
5
+ ---
app.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from infer_rvc_python import BaseLoader
3
+ import random
4
+ from urllib.request import urlretrieve
5
+
6
+ files_to_retrieve = [
7
+ "https://replicate.delivery/pbxt/N97QM3XNFrooJhV6Fb0meBff0aAG1rEDfvuxcdLS6fTx1vmWC/test.zip",
8
+ "https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt?download=true",
9
+ "https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt?download=true"
10
+ ]
11
+
12
+ for file in files_to_retrieve:
13
+ print(f"Downloading {file}")
14
+ urlretrieve(file, file.split("/")[-1])
15
+
16
+ # unzip test.zip
17
+ import zipfile
18
+ with zipfile.ZipFile("test.zip", 'r') as zip_ref:
19
+ zip_ref.extractall(".")
20
+
21
+
22
+ converter = BaseLoader(
23
+ only_cpu=True, hubert_path="./hubert_base.pt", rmvpe_path="./rmvpe.pt"
24
+ )
25
+
26
+ model = "test.pth"
27
+ index = "added_IVF839_Flat_nprobe_1_test_v2.index"
28
+
29
+
30
+
31
+
32
+
33
+ def voice_conversion(
34
+ audio,
35
+ pitch_change,
36
+ filter_radius,
37
+ envelope_ratio,
38
+ index_influence,
39
+ consonant_breath_protection,
40
+ ):
41
+ audio_out = run(
42
+ [str(audio)],
43
+ model,
44
+ "rmvpe+",
45
+ pitch_change,
46
+ index,
47
+ index_influence,
48
+ filter_radius,
49
+ envelope_ratio,
50
+ consonant_breath_protection,
51
+ )
52
+ print(audio_out)
53
+ return audio_out[0]
54
+
55
+
56
+
57
+ def convert_now(audio_files, random_tag):
58
+ return converter(audio_files, random_tag, overwrite=False, parallel_workers=8)
59
+
60
+
61
+ def run(
62
+ audio_files,
63
+ file_m,
64
+ pitch_alg,
65
+ pitch_lvl,
66
+ file_index,
67
+ index_inf,
68
+ r_m_f,
69
+ e_r,
70
+ c_b_p,
71
+ ):
72
+ random_tag = "USER_" + str(random.randint(10000000, 99999999))
73
+
74
+ converter.apply_conf(
75
+ tag=random_tag,
76
+ file_model=file_m,
77
+ pitch_algo=pitch_alg,
78
+ pitch_lvl=pitch_lvl,
79
+ file_index=file_index,
80
+ index_influence=index_inf,
81
+ respiration_median_filtering=r_m_f,
82
+ envelope_ratio=e_r,
83
+ consonant_breath_protection=c_b_p,
84
+ resample_sr=44100 if audio_files[0].endswith(".mp3") else 0,
85
+ )
86
+
87
+ return convert_now(audio_files, random_tag)
88
+
89
+
90
+
91
+ # Create the Gradio interface
92
+ # audio_input = gr.Audio(type="file")
93
+ # audio_output = gr.Audio(type="file")
94
+
95
+ # gr.Interface(fn=voice_conversion, inputs=audio_input, outputs=audio_output).launch()
96
+
97
+
98
+ def ui():
99
+ with gr.Blocks() as demo:
100
+ audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath")
101
+ with gr.Row():
102
+ pitch_slider = gr.Slider(
103
+ minimum=-24,
104
+ maximum=24,
105
+ value=0,
106
+ step=1,
107
+ label="Pitch",
108
+ interactive=True,
109
+ )
110
+ index_influence_slider = gr.Slider(
111
+ minimum=0,
112
+ maximum=1,
113
+ value=0.75,
114
+ step=0.01,
115
+ label="Index Influence",
116
+ interactive=True,
117
+ )
118
+ respiration_median_filtering = gr.Slider(
119
+ minimum=0,
120
+ maximum=10,
121
+ value=3,
122
+ step=1,
123
+ label="Resp. Median Filtering",
124
+ interactive=True,
125
+ )
126
+ envelope_ratio = gr.Slider(
127
+ minimum=0,
128
+ maximum=1,
129
+ value=0.25,
130
+ step=0.01,
131
+ label="Envelope Ratio",
132
+ interactive=True,
133
+ )
134
+ consonant_breath_protection = gr.Slider(
135
+ minimum=0,
136
+ maximum=1,
137
+ value=0.5,
138
+ step=0.01,
139
+ label="Consonant Breath Protection",
140
+ interactive=True,
141
+ )
142
+ button = gr.Button("Convert")
143
+ audio_output = gr.Audio(type="filepath")
144
+ button.click(
145
+ voice_conversion,
146
+ inputs=[
147
+ audio_input,
148
+ pitch_slider,
149
+ respiration_median_filtering,
150
+ envelope_ratio,
151
+ index_influence_slider,
152
+ consonant_breath_protection,
153
+ ],
154
+ outputs=audio_output,
155
+ )
156
+
157
+ return demo
158
+
159
+
160
+ ui().launch()
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
inference.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ # import gradio as gr
4
+ # import spaces
5
+ from infer_rvc_python import BaseLoader
6
+ import random
7
+ import logging
8
+ import time
9
+ import soundfile as sf
10
+ from infer_rvc_python.main import download_manager
11
+ import zipfile
12
+
13
+ converter = BaseLoader(
14
+ only_cpu=True, hubert_path="./hubert_base.pt", rmvpe_path="./rmvpe.pt"
15
+ )
16
+
17
+
18
+ def main():
19
+ audio_out = run(
20
+ ["super-shy-mdx.mp3"],
21
+ "test.pth",
22
+ "rmvpe+",
23
+ -12,
24
+ "added_IVF839_Flat_nprobe_1_test_v2.index",
25
+ 0.75, # index_influence
26
+ 3, # respiration_median_filtering,
27
+ 0.25, # envelope_ratio,
28
+ 0.5, # consonant_breath_protection
29
+ )
30
+
31
+
32
+ def convert_now(audio_files, random_tag, converter):
33
+ return converter(audio_files, random_tag, overwrite=False, parallel_workers=0)
34
+
35
+
36
+ def run(
37
+ audio_files,
38
+ file_m,
39
+ pitch_alg,
40
+ pitch_lvl,
41
+ file_index,
42
+ index_inf,
43
+ r_m_f,
44
+ e_r,
45
+ c_b_p,
46
+ ):
47
+ random_tag = "USER_" + str(random.randint(10000000, 99999999))
48
+
49
+ converter.apply_conf(
50
+ tag=random_tag,
51
+ file_model=file_m,
52
+ pitch_algo=pitch_alg,
53
+ pitch_lvl=pitch_lvl,
54
+ file_index=file_index,
55
+ index_influence=index_inf,
56
+ respiration_median_filtering=r_m_f,
57
+ envelope_ratio=e_r,
58
+ consonant_breath_protection=c_b_p,
59
+ resample_sr=44100 if audio_files[0].endswith(".mp3") else 0,
60
+ )
61
+
62
+ return convert_now(audio_files, random_tag, converter)
63
+
64
+ if __name__ == "__main__":
65
+ main()
playground.ipynb ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os\n",
10
+ "# import gradio as gr\n",
11
+ "# import spaces\n",
12
+ "from infer_rvc_python import BaseLoader\n",
13
+ "import random\n",
14
+ "import logging\n",
15
+ "import time\n",
16
+ "import soundfile as sf\n",
17
+ "from infer_rvc_python.main import download_manager\n",
18
+ "import zipfile"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 2,
24
+ "metadata": {},
25
+ "outputs": [],
26
+ "source": [
27
+ "converter = BaseLoader(only_cpu=False, hubert_path=\"./hubert_base.pt\", rmvpe_path=\"./rmvpe.pt\")"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "execution_count": 3,
33
+ "metadata": {},
34
+ "outputs": [],
35
+ "source": [
36
+ "def convert_now(audio_files, random_tag, converter):\n",
37
+ " return converter(\n",
38
+ " audio_files,\n",
39
+ " random_tag,\n",
40
+ " overwrite=False,\n",
41
+ " parallel_workers=8\n",
42
+ " )\n",
43
+ "\n",
44
+ "\n",
45
+ "def run(\n",
46
+ " audio_files,\n",
47
+ " file_m,\n",
48
+ " pitch_alg,\n",
49
+ " pitch_lvl,\n",
50
+ " file_index,\n",
51
+ " index_inf,\n",
52
+ " r_m_f,\n",
53
+ " e_r,\n",
54
+ " c_b_p,\n",
55
+ "):\n",
56
+ " random_tag = \"USER_\"+str(random.randint(10000000, 99999999))\n",
57
+ "\n",
58
+ " converter.apply_conf(\n",
59
+ " tag=random_tag,\n",
60
+ " file_model=file_m,\n",
61
+ " pitch_algo=pitch_alg,\n",
62
+ " pitch_lvl=pitch_lvl,\n",
63
+ " file_index=file_index,\n",
64
+ " index_influence=index_inf,\n",
65
+ " respiration_median_filtering=r_m_f,\n",
66
+ " envelope_ratio=e_r,\n",
67
+ " consonant_breath_protection=c_b_p,\n",
68
+ " resample_sr=44100 if audio_files[0].endswith('.mp3') else 0, \n",
69
+ " )\n",
70
+ "\n",
71
+ " return convert_now(audio_files, random_tag, converter)"
72
+ ]
73
+ },
74
+ {
75
+ "cell_type": "code",
76
+ "execution_count": 4,
77
+ "metadata": {},
78
+ "outputs": [
79
+ {
80
+ "name": "stderr",
81
+ "output_type": "stream",
82
+ "text": [
83
+ "[INFO] >> Supported N-card not found, using MPS for inference\n",
84
+ "[INFO] >> Config: Device is mps, half precision is True\n",
85
+ "[INFO] >> Parallel workers: 8\n",
86
+ "Progress: 0%| | 0/1 [00:00<?, ?it/s][INFO] >> Loading test.pth\n"
87
+ ]
88
+ },
89
+ {
90
+ "name": "stdout",
91
+ "output_type": "stream",
92
+ "text": [
93
+ "gin_channels: 256 self.spk_embed_dim: 109\n"
94
+ ]
95
+ },
96
+ {
97
+ "name": "stderr",
98
+ "output_type": "stream",
99
+ "text": [
100
+ "[INFO] >> Loading vocal pitch estimator model\n"
101
+ ]
102
+ },
103
+ {
104
+ "ename": "KeyboardInterrupt",
105
+ "evalue": "",
106
+ "output_type": "error",
107
+ "traceback": [
108
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
109
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
110
+ "Cell \u001b[0;32mIn[4], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msuper-shy-mdx.mp3\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtest.pth\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrmvpe+\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m12\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43madded_IVF839_Flat_nprobe_1_test_v2.index\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m0.75\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# index_influence\u001b[39;49;00m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# respiration_median_filtering,\u001b[39;49;00m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m0.25\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# envelope_ratio,\u001b[39;49;00m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m0.5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# consonant_breath_protection\u001b[39;49;00m\n\u001b[1;32m 11\u001b[0m \u001b[43m)\u001b[49m\n",
111
+ "Cell \u001b[0;32mIn[3], line 36\u001b[0m, in \u001b[0;36mrun\u001b[0;34m(audio_files, file_m, pitch_alg, pitch_lvl, file_index, index_inf, r_m_f, e_r, c_b_p)\u001b[0m\n\u001b[1;32m 21\u001b[0m random_tag \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUSER_\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m+\u001b[39m\u001b[38;5;28mstr\u001b[39m(random\u001b[38;5;241m.\u001b[39mrandint(\u001b[38;5;241m10000000\u001b[39m, \u001b[38;5;241m99999999\u001b[39m))\n\u001b[1;32m 23\u001b[0m converter\u001b[38;5;241m.\u001b[39mapply_conf(\n\u001b[1;32m 24\u001b[0m tag\u001b[38;5;241m=\u001b[39mrandom_tag,\n\u001b[1;32m 25\u001b[0m file_model\u001b[38;5;241m=\u001b[39mfile_m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 33\u001b[0m resample_sr\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m44100\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m audio_files[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mendswith(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.mp3\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;241m0\u001b[39m, \n\u001b[1;32m 34\u001b[0m )\n\u001b[0;32m---> 36\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mconvert_now\u001b[49m\u001b[43m(\u001b[49m\u001b[43maudio_files\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_tag\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconverter\u001b[49m\u001b[43m)\u001b[49m\n",
112
+ "Cell \u001b[0;32mIn[3], line 2\u001b[0m, in \u001b[0;36mconvert_now\u001b[0;34m(audio_files, random_tag, converter)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mconvert_now\u001b[39m(audio_files, random_tag, converter):\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mconverter\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43maudio_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_tag\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43moverwrite\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mparallel_workers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m8\u001b[39;49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
113
+ "File \u001b[0;32m~/Developer/playground/rvc-infer-demo/env/lib/python3.10/site-packages/infer_rvc_python/main.py:780\u001b[0m, in \u001b[0;36mBaseLoader.__call__\u001b[0;34m(self, audio_files, tag_list, overwrite, parallel_workers, type_output)\u001b[0m\n\u001b[1;32m 778\u001b[0m \u001b[38;5;66;03m# Run last\u001b[39;00m\n\u001b[1;32m 779\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m threads:\n\u001b[0;32m--> 780\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_threads\u001b[49m\u001b[43m(\u001b[49m\u001b[43mthreads\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 782\u001b[0m progress_bar\u001b[38;5;241m.\u001b[39mupdate(\u001b[38;5;28mlen\u001b[39m(threads))\n\u001b[1;32m 783\u001b[0m progress_bar\u001b[38;5;241m.\u001b[39mclose()\n",
114
+ "File \u001b[0;32m~/Developer/playground/rvc-infer-demo/env/lib/python3.10/site-packages/infer_rvc_python/main.py:563\u001b[0m, in \u001b[0;36mBaseLoader.run_threads\u001b[0;34m(self, threads)\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[38;5;66;03m# Wait for all threads to finish\u001b[39;00m\n\u001b[1;32m 562\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m thread \u001b[38;5;129;01min\u001b[39;00m threads:\n\u001b[0;32m--> 563\u001b[0m \u001b[43mthread\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 565\u001b[0m gc\u001b[38;5;241m.\u001b[39mcollect()\n\u001b[1;32m 566\u001b[0m torch\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;241m.\u001b[39mempty_cache()\n",
115
+ "File \u001b[0;32m/opt/homebrew/Cellar/[email protected]/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/threading.py:1096\u001b[0m, in \u001b[0;36mThread.join\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 1093\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcannot join current thread\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1095\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1096\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_wait_for_tstate_lock\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1097\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1098\u001b[0m \u001b[38;5;66;03m# the behavior of a negative timeout isn't documented, but\u001b[39;00m\n\u001b[1;32m 1099\u001b[0m \u001b[38;5;66;03m# historically .join(timeout=x) for x<0 has acted as if timeout=0\u001b[39;00m\n\u001b[1;32m 1100\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_wait_for_tstate_lock(timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mmax\u001b[39m(timeout, \u001b[38;5;241m0\u001b[39m))\n",
116
+ "File \u001b[0;32m/opt/homebrew/Cellar/[email protected]/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/threading.py:1116\u001b[0m, in \u001b[0;36mThread._wait_for_tstate_lock\u001b[0;34m(self, block, timeout)\u001b[0m\n\u001b[1;32m 1113\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 1115\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1116\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[43mlock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43macquire\u001b[49m\u001b[43m(\u001b[49m\u001b[43mblock\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m 1117\u001b[0m lock\u001b[38;5;241m.\u001b[39mrelease()\n\u001b[1;32m 1118\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_stop()\n",
117
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
118
+ ]
119
+ }
120
+ ],
121
+ "source": [
122
+ "run(\n",
123
+ " [\"super-shy-mdx.mp3\"],\n",
124
+ " \"test.pth\",\n",
125
+ " \"rmvpe+\",\n",
126
+ " -12,\n",
127
+ " \"added_IVF839_Flat_nprobe_1_test_v2.index\",\n",
128
+ " 0.75, # index_influence\n",
129
+ " 3, # respiration_median_filtering,\n",
130
+ " 0.25, # envelope_ratio,\n",
131
+ " 0.5, # consonant_breath_protection\n",
132
+ ")"
133
+ ]
134
+ },
135
+ {
136
+ "cell_type": "code",
137
+ "execution_count": null,
138
+ "metadata": {},
139
+ "outputs": [],
140
+ "source": []
141
+ }
142
+ ],
143
+ "metadata": {
144
+ "kernelspec": {
145
+ "display_name": "env",
146
+ "language": "python",
147
+ "name": "python3"
148
+ },
149
+ "language_info": {
150
+ "codemirror_mode": {
151
+ "name": "ipython",
152
+ "version": 3
153
+ },
154
+ "file_extension": ".py",
155
+ "mimetype": "text/x-python",
156
+ "name": "python",
157
+ "nbconvert_exporter": "python",
158
+ "pygments_lexer": "ipython3",
159
+ "version": "3.10.14"
160
+ }
161
+ },
162
+ "nbformat": 4,
163
+ "nbformat_minor": 2
164
+ }
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ infer-rvc-python