File size: 12,109 Bytes
0019e18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "# import gradio as gr\n",
    "# import spaces\n",
    "from infer_rvc_python import BaseLoader\n",
    "import random\n",
    "import logging\n",
    "import time\n",
    "import soundfile as sf\n",
    "from infer_rvc_python.main import download_manager\n",
    "import zipfile"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "converter = BaseLoader(only_cpu=False, hubert_path=\"./hubert_base.pt\", rmvpe_path=\"./rmvpe.pt\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def convert_now(audio_files, random_tag, converter):\n",
    "    return converter(\n",
    "        audio_files,\n",
    "        random_tag,\n",
    "        overwrite=False,\n",
    "        parallel_workers=8\n",
    "    )\n",
    "\n",
    "\n",
    "def run(\n",
    "    audio_files,\n",
    "    file_m,\n",
    "    pitch_alg,\n",
    "    pitch_lvl,\n",
    "    file_index,\n",
    "    index_inf,\n",
    "    r_m_f,\n",
    "    e_r,\n",
    "    c_b_p,\n",
    "):\n",
    "    random_tag = \"USER_\"+str(random.randint(10000000, 99999999))\n",
    "\n",
    "    converter.apply_conf(\n",
    "        tag=random_tag,\n",
    "        file_model=file_m,\n",
    "        pitch_algo=pitch_alg,\n",
    "        pitch_lvl=pitch_lvl,\n",
    "        file_index=file_index,\n",
    "        index_influence=index_inf,\n",
    "        respiration_median_filtering=r_m_f,\n",
    "        envelope_ratio=e_r,\n",
    "        consonant_breath_protection=c_b_p,\n",
    "        resample_sr=44100 if audio_files[0].endswith('.mp3') else 0, \n",
    "    )\n",
    "\n",
    "    return convert_now(audio_files, random_tag, converter)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[INFO] >> Supported N-card not found, using MPS for inference\n",
      "[INFO] >> Config: Device is mps, half precision is True\n",
      "[INFO] >> Parallel workers: 8\n",
      "Progress:   0%|          | 0/1 [00:00<?, ?it/s][INFO] >> Loading test.pth\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "gin_channels: 256 self.spk_embed_dim: 109\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[INFO] >> Loading vocal pitch estimator model\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[4], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      2\u001b[0m \u001b[43m    \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msuper-shy-mdx.mp3\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtest.pth\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      4\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrmvpe+\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      5\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m12\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      6\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43madded_IVF839_Flat_nprobe_1_test_v2.index\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      7\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m0.75\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# index_influence\u001b[39;49;00m\n\u001b[1;32m      8\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# respiration_median_filtering,\u001b[39;49;00m\n\u001b[1;32m      9\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m0.25\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# envelope_ratio,\u001b[39;49;00m\n\u001b[1;32m     10\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m0.5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# consonant_breath_protection\u001b[39;49;00m\n\u001b[1;32m     11\u001b[0m \u001b[43m)\u001b[49m\n",
      "Cell \u001b[0;32mIn[3], line 36\u001b[0m, in \u001b[0;36mrun\u001b[0;34m(audio_files, file_m, pitch_alg, pitch_lvl, file_index, index_inf, r_m_f, e_r, c_b_p)\u001b[0m\n\u001b[1;32m     21\u001b[0m random_tag \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUSER_\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m+\u001b[39m\u001b[38;5;28mstr\u001b[39m(random\u001b[38;5;241m.\u001b[39mrandint(\u001b[38;5;241m10000000\u001b[39m, \u001b[38;5;241m99999999\u001b[39m))\n\u001b[1;32m     23\u001b[0m converter\u001b[38;5;241m.\u001b[39mapply_conf(\n\u001b[1;32m     24\u001b[0m     tag\u001b[38;5;241m=\u001b[39mrandom_tag,\n\u001b[1;32m     25\u001b[0m     file_model\u001b[38;5;241m=\u001b[39mfile_m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     33\u001b[0m     resample_sr\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m44100\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m audio_files[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mendswith(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.mp3\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;241m0\u001b[39m, \n\u001b[1;32m     34\u001b[0m )\n\u001b[0;32m---> 36\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mconvert_now\u001b[49m\u001b[43m(\u001b[49m\u001b[43maudio_files\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_tag\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconverter\u001b[49m\u001b[43m)\u001b[49m\n",
      "Cell \u001b[0;32mIn[3], line 2\u001b[0m, in \u001b[0;36mconvert_now\u001b[0;34m(audio_files, random_tag, converter)\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mconvert_now\u001b[39m(audio_files, random_tag, converter):\n\u001b[0;32m----> 2\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mconverter\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[43m        \u001b[49m\u001b[43maudio_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      4\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrandom_tag\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      5\u001b[0m \u001b[43m        \u001b[49m\u001b[43moverwrite\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m      6\u001b[0m \u001b[43m        \u001b[49m\u001b[43mparallel_workers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m8\u001b[39;49m\n\u001b[1;32m      7\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/Developer/playground/rvc-infer-demo/env/lib/python3.10/site-packages/infer_rvc_python/main.py:780\u001b[0m, in \u001b[0;36mBaseLoader.__call__\u001b[0;34m(self, audio_files, tag_list, overwrite, parallel_workers, type_output)\u001b[0m\n\u001b[1;32m    778\u001b[0m \u001b[38;5;66;03m# Run last\u001b[39;00m\n\u001b[1;32m    779\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m threads:\n\u001b[0;32m--> 780\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_threads\u001b[49m\u001b[43m(\u001b[49m\u001b[43mthreads\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    782\u001b[0m progress_bar\u001b[38;5;241m.\u001b[39mupdate(\u001b[38;5;28mlen\u001b[39m(threads))\n\u001b[1;32m    783\u001b[0m progress_bar\u001b[38;5;241m.\u001b[39mclose()\n",
      "File \u001b[0;32m~/Developer/playground/rvc-infer-demo/env/lib/python3.10/site-packages/infer_rvc_python/main.py:563\u001b[0m, in \u001b[0;36mBaseLoader.run_threads\u001b[0;34m(self, threads)\u001b[0m\n\u001b[1;32m    561\u001b[0m \u001b[38;5;66;03m# Wait for all threads to finish\u001b[39;00m\n\u001b[1;32m    562\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m thread \u001b[38;5;129;01min\u001b[39;00m threads:\n\u001b[0;32m--> 563\u001b[0m     \u001b[43mthread\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    565\u001b[0m gc\u001b[38;5;241m.\u001b[39mcollect()\n\u001b[1;32m    566\u001b[0m torch\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;241m.\u001b[39mempty_cache()\n",
      "File \u001b[0;32m/opt/homebrew/Cellar/[email protected]/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/threading.py:1096\u001b[0m, in \u001b[0;36mThread.join\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m   1093\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcannot join current thread\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m   1095\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1096\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_wait_for_tstate_lock\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1097\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1098\u001b[0m     \u001b[38;5;66;03m# the behavior of a negative timeout isn't documented, but\u001b[39;00m\n\u001b[1;32m   1099\u001b[0m     \u001b[38;5;66;03m# historically .join(timeout=x) for x<0 has acted as if timeout=0\u001b[39;00m\n\u001b[1;32m   1100\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_wait_for_tstate_lock(timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mmax\u001b[39m(timeout, \u001b[38;5;241m0\u001b[39m))\n",
      "File \u001b[0;32m/opt/homebrew/Cellar/[email protected]/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/threading.py:1116\u001b[0m, in \u001b[0;36mThread._wait_for_tstate_lock\u001b[0;34m(self, block, timeout)\u001b[0m\n\u001b[1;32m   1113\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m   1115\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1116\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[43mlock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43macquire\u001b[49m\u001b[43m(\u001b[49m\u001b[43mblock\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m   1117\u001b[0m         lock\u001b[38;5;241m.\u001b[39mrelease()\n\u001b[1;32m   1118\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_stop()\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "run(\n",
    "    [\"super-shy-mdx.mp3\"],\n",
    "    \"test.pth\",\n",
    "    \"rmvpe+\",\n",
    "    -12,\n",
    "    \"added_IVF839_Flat_nprobe_1_test_v2.index\",\n",
    "    0.75, # index_influence\n",
    "    3, # respiration_median_filtering,\n",
    "    0.25, # envelope_ratio,\n",
    "    0.5, # consonant_breath_protection\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}