{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "# import gradio as gr\n", "# import spaces\n", "from infer_rvc_python import BaseLoader\n", "import random\n", "import logging\n", "import time\n", "import soundfile as sf\n", "from infer_rvc_python.main import download_manager\n", "import zipfile" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "converter = BaseLoader(only_cpu=False, hubert_path=\"./hubert_base.pt\", rmvpe_path=\"./rmvpe.pt\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def convert_now(audio_files, random_tag, converter):\n", " return converter(\n", " audio_files,\n", " random_tag,\n", " overwrite=False,\n", " parallel_workers=8\n", " )\n", "\n", "\n", "def run(\n", " audio_files,\n", " file_m,\n", " pitch_alg,\n", " pitch_lvl,\n", " file_index,\n", " index_inf,\n", " r_m_f,\n", " e_r,\n", " c_b_p,\n", "):\n", " random_tag = \"USER_\"+str(random.randint(10000000, 99999999))\n", "\n", " converter.apply_conf(\n", " tag=random_tag,\n", " file_model=file_m,\n", " pitch_algo=pitch_alg,\n", " pitch_lvl=pitch_lvl,\n", " file_index=file_index,\n", " index_influence=index_inf,\n", " respiration_median_filtering=r_m_f,\n", " envelope_ratio=e_r,\n", " consonant_breath_protection=c_b_p,\n", " resample_sr=44100 if audio_files[0].endswith('.mp3') else 0, \n", " )\n", "\n", " return convert_now(audio_files, random_tag, converter)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[INFO] >> Supported N-card not found, using MPS for inference\n", "[INFO] >> Config: Device is mps, half precision is True\n", "[INFO] >> Parallel workers: 8\n", "Progress: 0%| | 0/1 [00:00> Loading test.pth\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "gin_channels: 256 self.spk_embed_dim: 109\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[INFO] >> Loading vocal pitch estimator model\n" ] }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[4], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msuper-shy-mdx.mp3\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtest.pth\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrmvpe+\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m12\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43madded_IVF839_Flat_nprobe_1_test_v2.index\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m0.75\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# index_influence\u001b[39;49;00m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# respiration_median_filtering,\u001b[39;49;00m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m0.25\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# envelope_ratio,\u001b[39;49;00m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m0.5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# consonant_breath_protection\u001b[39;49;00m\n\u001b[1;32m 11\u001b[0m \u001b[43m)\u001b[49m\n", "Cell \u001b[0;32mIn[3], line 36\u001b[0m, in \u001b[0;36mrun\u001b[0;34m(audio_files, file_m, pitch_alg, pitch_lvl, file_index, index_inf, r_m_f, e_r, c_b_p)\u001b[0m\n\u001b[1;32m 21\u001b[0m random_tag \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUSER_\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m+\u001b[39m\u001b[38;5;28mstr\u001b[39m(random\u001b[38;5;241m.\u001b[39mrandint(\u001b[38;5;241m10000000\u001b[39m, \u001b[38;5;241m99999999\u001b[39m))\n\u001b[1;32m 23\u001b[0m converter\u001b[38;5;241m.\u001b[39mapply_conf(\n\u001b[1;32m 24\u001b[0m tag\u001b[38;5;241m=\u001b[39mrandom_tag,\n\u001b[1;32m 25\u001b[0m file_model\u001b[38;5;241m=\u001b[39mfile_m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 33\u001b[0m resample_sr\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m44100\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m audio_files[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mendswith(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.mp3\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;241m0\u001b[39m, \n\u001b[1;32m 34\u001b[0m )\n\u001b[0;32m---> 36\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mconvert_now\u001b[49m\u001b[43m(\u001b[49m\u001b[43maudio_files\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_tag\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconverter\u001b[49m\u001b[43m)\u001b[49m\n", "Cell \u001b[0;32mIn[3], line 2\u001b[0m, in \u001b[0;36mconvert_now\u001b[0;34m(audio_files, random_tag, converter)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mconvert_now\u001b[39m(audio_files, random_tag, converter):\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mconverter\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43maudio_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_tag\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43moverwrite\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mparallel_workers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m8\u001b[39;49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/Developer/playground/rvc-infer-demo/env/lib/python3.10/site-packages/infer_rvc_python/main.py:780\u001b[0m, in \u001b[0;36mBaseLoader.__call__\u001b[0;34m(self, audio_files, tag_list, overwrite, parallel_workers, type_output)\u001b[0m\n\u001b[1;32m 778\u001b[0m \u001b[38;5;66;03m# Run last\u001b[39;00m\n\u001b[1;32m 779\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m threads:\n\u001b[0;32m--> 780\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_threads\u001b[49m\u001b[43m(\u001b[49m\u001b[43mthreads\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 782\u001b[0m progress_bar\u001b[38;5;241m.\u001b[39mupdate(\u001b[38;5;28mlen\u001b[39m(threads))\n\u001b[1;32m 783\u001b[0m progress_bar\u001b[38;5;241m.\u001b[39mclose()\n", "File \u001b[0;32m~/Developer/playground/rvc-infer-demo/env/lib/python3.10/site-packages/infer_rvc_python/main.py:563\u001b[0m, in \u001b[0;36mBaseLoader.run_threads\u001b[0;34m(self, threads)\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[38;5;66;03m# Wait for all threads to finish\u001b[39;00m\n\u001b[1;32m 562\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m thread \u001b[38;5;129;01min\u001b[39;00m threads:\n\u001b[0;32m--> 563\u001b[0m \u001b[43mthread\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 565\u001b[0m gc\u001b[38;5;241m.\u001b[39mcollect()\n\u001b[1;32m 566\u001b[0m torch\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;241m.\u001b[39mempty_cache()\n", "File \u001b[0;32m/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/threading.py:1096\u001b[0m, in \u001b[0;36mThread.join\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 1093\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcannot join current thread\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1095\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1096\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_wait_for_tstate_lock\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1097\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1098\u001b[0m \u001b[38;5;66;03m# the behavior of a negative timeout isn't documented, but\u001b[39;00m\n\u001b[1;32m 1099\u001b[0m \u001b[38;5;66;03m# historically .join(timeout=x) for x<0 has acted as if timeout=0\u001b[39;00m\n\u001b[1;32m 1100\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_wait_for_tstate_lock(timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mmax\u001b[39m(timeout, \u001b[38;5;241m0\u001b[39m))\n", "File \u001b[0;32m/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/threading.py:1116\u001b[0m, in \u001b[0;36mThread._wait_for_tstate_lock\u001b[0;34m(self, block, timeout)\u001b[0m\n\u001b[1;32m 1113\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 1115\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1116\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[43mlock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43macquire\u001b[49m\u001b[43m(\u001b[49m\u001b[43mblock\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m 1117\u001b[0m lock\u001b[38;5;241m.\u001b[39mrelease()\n\u001b[1;32m 1118\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_stop()\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "run(\n", " [\"super-shy-mdx.mp3\"],\n", " \"test.pth\",\n", " \"rmvpe+\",\n", " -12,\n", " \"added_IVF839_Flat_nprobe_1_test_v2.index\",\n", " 0.75, # index_influence\n", " 3, # respiration_median_filtering,\n", " 0.25, # envelope_ratio,\n", " 0.5, # consonant_breath_protection\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.14" } }, "nbformat": 4, "nbformat_minor": 2 }