import gradio as gr
import os
from constants import VOICE_METHODS, BARK_VOICES, EDGE_VOICES
import platform
from models.model import *
from tts.conversion import COQUI_LANGUAGES
import pytube
import os
import traceback
from pydub import AudioSegment
# from audio_enhance.functions import audio_enhance
def convert_yt_to_wav(url):
if not url:
return "Please enter the video link", None
try:
print(f"Converting video {url}...")
# Download the video using pytube
video = pytube.YouTube(url)
stream = video.streams.filter(only_audio=True).first()
video_output_folder = os.path.join(f"yt_videos") # Destination folder path
audio_output_folder = 'audios'
print("Downloading video")
video_file_path = stream.download(output_path=video_output_folder)
print(video_file_path)
file_name = os.path.basename(video_file_path)
audio_file_path = os.path.join(audio_output_folder, file_name.replace('.mp4','.wav'))
# Convert mp4 to wav
print("Converting to wav")
sound = AudioSegment.from_file(video_file_path, format="mp4")
sound.export(audio_file_path, format="wav")
if os.path.exists(video_file_path):
os.remove(video_file_path)
return "Success", audio_file_path
except ConnectionResetError as cre:
return "Connection lost, please refresh or try again later.", None
except Exception as e:
return str(e), None
with gr.Blocks() as app:
gr.HTML("
Simple RVC Inference - by Juuxn 💻
")
gr.HTML(" The current space uses only CPU, so it's only for inference. It is recommended to duplicate the space to avoid issues with processing queues.
")
gr.Markdown("Simple RVC GPU Inference on Colab: [](https://colab.research.google.com/drive/1NKqqTR04HujeBxzwe7jbYEvNi8LbxD_N?usp=sharing)")
gr.Markdown(
"[](https://huggingface.co/spaces/juuxn/SimpleRVC?duplicate=true)\n\n"
)
gr.Markdown("Collection of models you can use: RVC + AI Kits. **[RVC Community Models](https://docs.google.com/spreadsheets/d/1owfUtQuLW9ReiIwg6U9UkkDmPOTkuNHf0OKQtWu1iaI)**")
with gr.Tab("Inference"):
model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Model URL", show_label=True)
with gr.Row():
with gr.Column():
audio_path = gr.Audio(label="Audio File", show_label=True, type="filepath")
index_rate = gr.Slider(minimum=0, maximum=1, label="Search feature ratio:", value=0.75, interactive=True)
filter_radius1 = gr.Slider(minimum=0, maximum=7, label="Filter (breathing roughness reduction)", value=3, step=1, interactive=True)
with gr.Column():
f0_method = gr.Dropdown(choices=["harvest", "pm", "crepe", "crepe-tiny", "mangio-crepe", "mangio-crepe-tiny", "rmvpe"],
value="rmvpe",
label="Algorithm", show_label=True)
vc_transform0 = gr.Slider(minimum=-12, label="Number of semitones, up an octave: 12, down an octave: -12", value=0, maximum=12, step=1)
protect0 = gr.Slider(
minimum=0, maximum=0.5, label="Protect voiceless consonants and breathing sounds. 0.5 to disable.", value=0.33,
step=0.01,
interactive=True,
)
resample_sr1 = gr.Slider(
minimum=0,
maximum=48000,
label="Resample the output audio to the final sampling rate. 0 for no resampling.",
value=0,
step=1,
interactive=True,
)
# Output
with gr.Row():
vc_output1 = gr.Textbox(label="Output")
vc_output2 = gr.Audio(label="Output Audio")
btn = gr.Button(value="Convert")
btn.click(infer, inputs=[model_url, f0_method, audio_path, index_rate, vc_transform0, protect0, resample_sr1, filter_radius1], outputs=[vc_output1, vc_output2])
with gr.TabItem("TTS"):
with gr.Row():
tts_text = gr.Textbox(
label="Text:",
placeholder="Text you want to convert to speech...",
lines=6,
)
with gr.Column():
with gr.Row():
tts_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="RVC Model URL", show_label=True)
with gr.Row():
tts_method = gr.Dropdown(choices=VOICE_METHODS, value="Edge-tts", label="TTS Method:", visible=True)
tts_model = gr.Dropdown(choices=EDGE_VOICES, label="TTS Model:", visible=True, interactive=True)
tts_api_key = gr.Textbox(label="ElevenLabs API Key", show_label=True, placeholder="4a4afce72349680c8e8b6fdcfaf2b65a", interactive=True, visible=False)
tts_coqui_languages = gr.Radio(
label="Language",
choices=COQUI_LANGUAGES,
value="en",
visible=False
)
tts_btn = gr.Button(value="Convert")
with gr.Row():
tts_vc_output1 = gr.Textbox(label="Output")
tts_vc_output2 = gr.Audio(label="Output Audio")
tts_btn.click(fn=tts_infer, inputs=[tts_text, tts_model_url, tts_method, tts_model, tts_api_key, tts_coqui_languages], outputs=[tts_vc_output1, tts_vc_output2])
tts_msg = gr.Markdown("""**I recommend creating an Eleven Labs account and entering your API key; it's free and you have a limit of 10k characters per month.**

""", visible=False)
tts_method.change(fn=update_tts_methods_voice, inputs=[tts_method], outputs=[tts_model, tts_msg, tts_api_key, tts_coqui_languages])
with gr.TabItem("Youtube"):
gr.Markdown("## Convert YouTube video to audio")
with gr.Row():
yt_url = gr.Textbox(
label="Video URL:",
placeholder="https://www.youtube.com/watch?v=3vEiqil5d3Q"
)
yt_btn = gr.Button(value="Convert")
with gr.Row():
yt_output1 = gr.Textbox(label="Output")
yt_output2 = gr.Audio(label="Output Audio")
yt_btn.click(fn=convert_yt_to_wav, inputs=[yt_url], outputs=[yt_output1, yt_output2])
with gr.Tab("Models"):
gr.HTML("Search models
")
search_name = gr.Textbox(placeholder="Billie Eilish (RVC v2 - 100 epoch)", label="Name", show_label=True)
# Output
with gr.Row():
search_output = gr.Markdown(label="Output")
btn_search_model = gr.Button(value="Search")
btn_search_model.click(fn=search_model, inputs=[search_name], outputs=[search_output])
gr.HTML("Submit your model
")
post_name = gr.Textbox(placeholder="Billie Eilish (RVC v2 - 100 epoch)", label="Name", show_label=True)
post_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Model URL", show_label=True)
post_creator = gr.Textbox(placeholder="Discord ID or link to creator's profile", label="Creator", show_label=True)
post_version = gr.Dropdown(choices=["RVC v1", "RVC v2"], value="RVC v1", label="Version", show_label=True)
# Output
with gr.Row():
post_output = gr.Markdown(label="Output")
btn_post_model = gr.Button(value="Post")
btn_post_model.click(fn=post_model, inputs=[post_name, post_model_url, post_version, post_creator], outputs=[post_output])
app.queue(concurrency_count=200, max_size=1022).launch()
#share=True