import gradio as gr import os from constants import VOICE_METHODS, BARK_VOICES, EDGE_VOICES import platform from models.model import * from tts.conversion import COQUI_LANGUAGES import pytube import os import traceback from pydub import AudioSegment # from audio_enhance.functions import audio_enhance def convert_yt_to_wav(url): if not url: return "Please enter the video link", None try: print(f"Converting video {url}...") # Download the video using pytube video = pytube.YouTube(url) stream = video.streams.filter(only_audio=True).first() video_output_folder = os.path.join(f"yt_videos") # Destination folder path audio_output_folder = 'audios' print("Downloading video") video_file_path = stream.download(output_path=video_output_folder) print(video_file_path) file_name = os.path.basename(video_file_path) audio_file_path = os.path.join(audio_output_folder, file_name.replace('.mp4','.wav')) # Convert mp4 to wav print("Converting to wav") sound = AudioSegment.from_file(video_file_path, format="mp4") sound.export(audio_file_path, format="wav") if os.path.exists(video_file_path): os.remove(video_file_path) return "Success", audio_file_path except ConnectionResetError as cre: return "Connection lost, please refresh or try again later.", None except Exception as e: return str(e), None with gr.Blocks() as app: gr.HTML("

Simple RVC Inference - by Juuxn 💻

") gr.HTML("

The current space uses only CPU, so it's only for inference. It is recommended to duplicate the space to avoid issues with processing queues.

") gr.Markdown("Simple RVC GPU Inference on Colab: [![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/drive/1NKqqTR04HujeBxzwe7jbYEvNi8LbxD_N?usp=sharing)") gr.Markdown( "[![Duplicate this Space](https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm-dark.svg)](https://huggingface.co/spaces/juuxn/SimpleRVC?duplicate=true)\n\n" ) gr.Markdown("Collection of models you can use: RVC + AI Kits. **[RVC Community Models](https://docs.google.com/spreadsheets/d/1owfUtQuLW9ReiIwg6U9UkkDmPOTkuNHf0OKQtWu1iaI)**") with gr.Tab("Inference"): model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Model URL", show_label=True) with gr.Row(): with gr.Column(): audio_path = gr.Audio(label="Audio File", show_label=True, type="filepath") index_rate = gr.Slider(minimum=0, maximum=1, label="Search feature ratio:", value=0.75, interactive=True) filter_radius1 = gr.Slider(minimum=0, maximum=7, label="Filter (breathing roughness reduction)", value=3, step=1, interactive=True) with gr.Column(): f0_method = gr.Dropdown(choices=["harvest", "pm", "crepe", "crepe-tiny", "mangio-crepe", "mangio-crepe-tiny", "rmvpe"], value="rmvpe", label="Algorithm", show_label=True) vc_transform0 = gr.Slider(minimum=-12, label="Number of semitones, up an octave: 12, down an octave: -12", value=0, maximum=12, step=1) protect0 = gr.Slider( minimum=0, maximum=0.5, label="Protect voiceless consonants and breathing sounds. 0.5 to disable.", value=0.33, step=0.01, interactive=True, ) resample_sr1 = gr.Slider( minimum=0, maximum=48000, label="Resample the output audio to the final sampling rate. 0 for no resampling.", value=0, step=1, interactive=True, ) # Output with gr.Row(): vc_output1 = gr.Textbox(label="Output") vc_output2 = gr.Audio(label="Output Audio") btn = gr.Button(value="Convert") btn.click(infer, inputs=[model_url, f0_method, audio_path, index_rate, vc_transform0, protect0, resample_sr1, filter_radius1], outputs=[vc_output1, vc_output2]) with gr.TabItem("TTS"): with gr.Row(): tts_text = gr.Textbox( label="Text:", placeholder="Text you want to convert to speech...", lines=6, ) with gr.Column(): with gr.Row(): tts_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="RVC Model URL", show_label=True) with gr.Row(): tts_method = gr.Dropdown(choices=VOICE_METHODS, value="Edge-tts", label="TTS Method:", visible=True) tts_model = gr.Dropdown(choices=EDGE_VOICES, label="TTS Model:", visible=True, interactive=True) tts_api_key = gr.Textbox(label="ElevenLabs API Key", show_label=True, placeholder="4a4afce72349680c8e8b6fdcfaf2b65a", interactive=True, visible=False) tts_coqui_languages = gr.Radio( label="Language", choices=COQUI_LANGUAGES, value="en", visible=False ) tts_btn = gr.Button(value="Convert") with gr.Row(): tts_vc_output1 = gr.Textbox(label="Output") tts_vc_output2 = gr.Audio(label="Output Audio") tts_btn.click(fn=tts_infer, inputs=[tts_text, tts_model_url, tts_method, tts_model, tts_api_key, tts_coqui_languages], outputs=[tts_vc_output1, tts_vc_output2]) tts_msg = gr.Markdown("""**I recommend creating an Eleven Labs account and entering your API key; it's free and you have a limit of 10k characters per month.**
![Imgur](https://imgur.com/HH6YTu0.png) """, visible=False) tts_method.change(fn=update_tts_methods_voice, inputs=[tts_method], outputs=[tts_model, tts_msg, tts_api_key, tts_coqui_languages]) with gr.TabItem("Youtube"): gr.Markdown("## Convert YouTube video to audio") with gr.Row(): yt_url = gr.Textbox( label="Video URL:", placeholder="https://www.youtube.com/watch?v=3vEiqil5d3Q" ) yt_btn = gr.Button(value="Convert") with gr.Row(): yt_output1 = gr.Textbox(label="Output") yt_output2 = gr.Audio(label="Output Audio") yt_btn.click(fn=convert_yt_to_wav, inputs=[yt_url], outputs=[yt_output1, yt_output2]) with gr.Tab("Models"): gr.HTML("

Search models

") search_name = gr.Textbox(placeholder="Billie Eilish (RVC v2 - 100 epoch)", label="Name", show_label=True) # Output with gr.Row(): search_output = gr.Markdown(label="Output") btn_search_model = gr.Button(value="Search") btn_search_model.click(fn=search_model, inputs=[search_name], outputs=[search_output]) gr.HTML("

Submit your model

") post_name = gr.Textbox(placeholder="Billie Eilish (RVC v2 - 100 epoch)", label="Name", show_label=True) post_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Model URL", show_label=True) post_creator = gr.Textbox(placeholder="Discord ID or link to creator's profile", label="Creator", show_label=True) post_version = gr.Dropdown(choices=["RVC v1", "RVC v2"], value="RVC v1", label="Version", show_label=True) # Output with gr.Row(): post_output = gr.Markdown(label="Output") btn_post_model = gr.Button(value="Post") btn_post_model.click(fn=post_model, inputs=[post_name, post_model_url, post_version, post_creator], outputs=[post_output]) app.queue(concurrency_count=200, max_size=1022).launch() #share=True