Spaces:
Runtime error
Runtime error
File size: 12,109 Bytes
0019e18 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"# import gradio as gr\n",
"# import spaces\n",
"from infer_rvc_python import BaseLoader\n",
"import random\n",
"import logging\n",
"import time\n",
"import soundfile as sf\n",
"from infer_rvc_python.main import download_manager\n",
"import zipfile"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"converter = BaseLoader(only_cpu=False, hubert_path=\"./hubert_base.pt\", rmvpe_path=\"./rmvpe.pt\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def convert_now(audio_files, random_tag, converter):\n",
" return converter(\n",
" audio_files,\n",
" random_tag,\n",
" overwrite=False,\n",
" parallel_workers=8\n",
" )\n",
"\n",
"\n",
"def run(\n",
" audio_files,\n",
" file_m,\n",
" pitch_alg,\n",
" pitch_lvl,\n",
" file_index,\n",
" index_inf,\n",
" r_m_f,\n",
" e_r,\n",
" c_b_p,\n",
"):\n",
" random_tag = \"USER_\"+str(random.randint(10000000, 99999999))\n",
"\n",
" converter.apply_conf(\n",
" tag=random_tag,\n",
" file_model=file_m,\n",
" pitch_algo=pitch_alg,\n",
" pitch_lvl=pitch_lvl,\n",
" file_index=file_index,\n",
" index_influence=index_inf,\n",
" respiration_median_filtering=r_m_f,\n",
" envelope_ratio=e_r,\n",
" consonant_breath_protection=c_b_p,\n",
" resample_sr=44100 if audio_files[0].endswith('.mp3') else 0, \n",
" )\n",
"\n",
" return convert_now(audio_files, random_tag, converter)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[INFO] >> Supported N-card not found, using MPS for inference\n",
"[INFO] >> Config: Device is mps, half precision is True\n",
"[INFO] >> Parallel workers: 8\n",
"Progress: 0%| | 0/1 [00:00<?, ?it/s][INFO] >> Loading test.pth\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"gin_channels: 256 self.spk_embed_dim: 109\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[INFO] >> Loading vocal pitch estimator model\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[4], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msuper-shy-mdx.mp3\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtest.pth\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrmvpe+\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m12\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43madded_IVF839_Flat_nprobe_1_test_v2.index\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m0.75\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# index_influence\u001b[39;49;00m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# respiration_median_filtering,\u001b[39;49;00m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m0.25\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# envelope_ratio,\u001b[39;49;00m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m0.5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# consonant_breath_protection\u001b[39;49;00m\n\u001b[1;32m 11\u001b[0m \u001b[43m)\u001b[49m\n",
"Cell \u001b[0;32mIn[3], line 36\u001b[0m, in \u001b[0;36mrun\u001b[0;34m(audio_files, file_m, pitch_alg, pitch_lvl, file_index, index_inf, r_m_f, e_r, c_b_p)\u001b[0m\n\u001b[1;32m 21\u001b[0m random_tag \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUSER_\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m+\u001b[39m\u001b[38;5;28mstr\u001b[39m(random\u001b[38;5;241m.\u001b[39mrandint(\u001b[38;5;241m10000000\u001b[39m, \u001b[38;5;241m99999999\u001b[39m))\n\u001b[1;32m 23\u001b[0m converter\u001b[38;5;241m.\u001b[39mapply_conf(\n\u001b[1;32m 24\u001b[0m tag\u001b[38;5;241m=\u001b[39mrandom_tag,\n\u001b[1;32m 25\u001b[0m file_model\u001b[38;5;241m=\u001b[39mfile_m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 33\u001b[0m resample_sr\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m44100\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m audio_files[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mendswith(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.mp3\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;241m0\u001b[39m, \n\u001b[1;32m 34\u001b[0m )\n\u001b[0;32m---> 36\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mconvert_now\u001b[49m\u001b[43m(\u001b[49m\u001b[43maudio_files\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_tag\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconverter\u001b[49m\u001b[43m)\u001b[49m\n",
"Cell \u001b[0;32mIn[3], line 2\u001b[0m, in \u001b[0;36mconvert_now\u001b[0;34m(audio_files, random_tag, converter)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mconvert_now\u001b[39m(audio_files, random_tag, converter):\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mconverter\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43maudio_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_tag\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43moverwrite\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mparallel_workers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m8\u001b[39;49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/Developer/playground/rvc-infer-demo/env/lib/python3.10/site-packages/infer_rvc_python/main.py:780\u001b[0m, in \u001b[0;36mBaseLoader.__call__\u001b[0;34m(self, audio_files, tag_list, overwrite, parallel_workers, type_output)\u001b[0m\n\u001b[1;32m 778\u001b[0m \u001b[38;5;66;03m# Run last\u001b[39;00m\n\u001b[1;32m 779\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m threads:\n\u001b[0;32m--> 780\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_threads\u001b[49m\u001b[43m(\u001b[49m\u001b[43mthreads\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 782\u001b[0m progress_bar\u001b[38;5;241m.\u001b[39mupdate(\u001b[38;5;28mlen\u001b[39m(threads))\n\u001b[1;32m 783\u001b[0m progress_bar\u001b[38;5;241m.\u001b[39mclose()\n",
"File \u001b[0;32m~/Developer/playground/rvc-infer-demo/env/lib/python3.10/site-packages/infer_rvc_python/main.py:563\u001b[0m, in \u001b[0;36mBaseLoader.run_threads\u001b[0;34m(self, threads)\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[38;5;66;03m# Wait for all threads to finish\u001b[39;00m\n\u001b[1;32m 562\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m thread \u001b[38;5;129;01min\u001b[39;00m threads:\n\u001b[0;32m--> 563\u001b[0m \u001b[43mthread\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 565\u001b[0m gc\u001b[38;5;241m.\u001b[39mcollect()\n\u001b[1;32m 566\u001b[0m torch\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;241m.\u001b[39mempty_cache()\n",
"File \u001b[0;32m/opt/homebrew/Cellar/[email protected]/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/threading.py:1096\u001b[0m, in \u001b[0;36mThread.join\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 1093\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcannot join current thread\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1095\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1096\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_wait_for_tstate_lock\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1097\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1098\u001b[0m \u001b[38;5;66;03m# the behavior of a negative timeout isn't documented, but\u001b[39;00m\n\u001b[1;32m 1099\u001b[0m \u001b[38;5;66;03m# historically .join(timeout=x) for x<0 has acted as if timeout=0\u001b[39;00m\n\u001b[1;32m 1100\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_wait_for_tstate_lock(timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mmax\u001b[39m(timeout, \u001b[38;5;241m0\u001b[39m))\n",
"File \u001b[0;32m/opt/homebrew/Cellar/[email protected]/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/threading.py:1116\u001b[0m, in \u001b[0;36mThread._wait_for_tstate_lock\u001b[0;34m(self, block, timeout)\u001b[0m\n\u001b[1;32m 1113\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 1115\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1116\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[43mlock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43macquire\u001b[49m\u001b[43m(\u001b[49m\u001b[43mblock\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m 1117\u001b[0m lock\u001b[38;5;241m.\u001b[39mrelease()\n\u001b[1;32m 1118\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_stop()\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"run(\n",
" [\"super-shy-mdx.mp3\"],\n",
" \"test.pth\",\n",
" \"rmvpe+\",\n",
" -12,\n",
" \"added_IVF839_Flat_nprobe_1_test_v2.index\",\n",
" 0.75, # index_influence\n",
" 3, # respiration_median_filtering,\n",
" 0.25, # envelope_ratio,\n",
" 0.5, # consonant_breath_protection\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|