Spaces:

ImPavloh
/

voiceit

Running

File size: 10,293 Bytes

from threading import Thread
from pathlib import Path
import gradio as gr
import subprocess
import shutil
import time
import copy
import glob
import json
import os

current_dir = Path(__file__).resolve().parent
MODELOS = current_dir / "modelos"
SEGMENTS_DIRNAME = current_dir / "segments"
INFERENCE_OUTPUT_DIRNAME = current_dir / "inference_output"

def slice_audio(filepath):
    assert os.path.exists(filepath), f"No se ha encontrado {filepath}."
    filename, extension = os.path.splitext(filepath)
    filename = filename.split("/")[-1]
    os.makedirs(SEGMENTS_DIRNAME, exist_ok=True)
    output_pattern = os.path.join(SEGMENTS_DIRNAME, f"{filename}_%d{extension}")
    os.system(f"ffmpeg -i {filepath} -f segment -segment_time 75 -c copy {output_pattern}")

def get_container_format(filename):
    command = ["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "format=format_name", "-of", "default=noprint_wrappers=1:nokey=1", filename]
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output, error = process.communicate()
    if error:
        raise ValueError(f"Error: {error.decode()}")
    return output.decode().strip()

def concatenate_segments(foldername, final_filename):
    foldername = Path(foldername)
    assert foldername.exists
    all_segs = [f for f in sorted(foldername.glob("**/*")) if f.is_file()]
    with open(foldername / "concat_list.txt", "w") as f:
        for seg in all_segs:
            f.write('file ' + str(seg.absolute()) + "\n")
    os.system(f"ffmpeg -f concat -safe 0 -i {foldername}/concat_list.txt -codec copy {foldername}/{final_filename}")

def cleanup_dirs():
    for dirname in (SEGMENTS_DIRNAME, INFERENCE_OUTPUT_DIRNAME):
        dir_path = Path(dirname)
        if dir_path.exists():
            shutil.rmtree(dir_path)

def get_speakers():
    global speakers
    speakers = []
    for _, dirs, _ in os.walk(MODELOS):
        for folder in dirs:
            cur_speaker = {}
            g = glob.glob(os.path.join(MODELOS, folder, 'G_*.pth'))
            if not len(g):
                continue
            cur_speaker["model_path"] = g[0]
            cur_speaker["model_folder"] = folder
            cur_speaker["cluster_path"] = ""

            cfg = glob.glob(os.path.join(MODELOS, folder, '*.json'))
            if not len(cfg):
                continue
            cur_speaker["cfg_path"] = cfg[0]
            with open(cur_speaker["cfg_path"]) as f:
                try:
                    cfg_json = json.loads(f.read())
                except Exception as e:
                    print("Archivo json malformado en" + folder)
                for name, i in cfg_json["spk"].items():
                    cur_speaker["name"] = name
                    cur_speaker["id"] = i
                    if not name.startswith('.'):
                        speakers.append(copy.copy(cur_speaker))
    return sorted(speakers, key=lambda x: x["name"].lower())

def run_inference(speaker, seg_path, f0_method, transpose, noise_scale, cluster_ratio):
    model_path = speaker["model_path"]
    config_path = speaker["cfg_path"]
    cluster_path = speaker["cluster_path"]
    cluster_args = f"-k {cluster_path} -r {cluster_ratio}" if cluster_path and cluster_ratio > 0 else ""
    inference_cmd = f"svc infer {seg_path.absolute()} -m {model_path} -c {config_path} {cluster_args} -t {transpose} --f0-method {f0_method} -n {noise_scale} -o {INFERENCE_OUTPUT_DIRNAME}/{seg_path.name} --no-auto-predict-f0"
    result = subprocess.run(inference_cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    if result.stderr:
        if "AttributeError" in result.stderr:
            return  None, gr.Textbox.update("⚠️ Modelo SVC incompatible.")
    if not list(Path(SEGMENTS_DIRNAME).glob("*")):
        return  None, gr.Textbox.update("⚠️ Error.")

def convert(speaker_box, audio):
    speaker = next((x for x in speakers if x["name"] == speaker_box), None)
    if not speaker:
        return None, gr.Textbox.update("⚠️ Selecciona un modelo.")
    if not audio:
        return None, gr.Textbox.update("⚠️ Sube un audio.")

    file_path = os.path.join(os.getcwd(), str(audio))
    model_path = os.path.join(os.getcwd(), speaker["model_path"])
    config_path = os.path.join(os.getcwd(), speaker["cfg_path"])
    cluster_path = os.path.join(os.getcwd(), speaker["cluster_path"])
    f0_method = "crepe"
    transpose = 0
    noise_scale = 0.4
    cluster_ratio = 0
    if os.path.exists(SEGMENTS_DIRNAME) or os.path.exists(INFERENCE_OUTPUT_DIRNAME):
        cleanup_dirs()
    slice_audio(file_path)
    os.makedirs("inference_output", exist_ok=True)
    all_segs_paths = sorted(Path(SEGMENTS_DIRNAME).glob("*"))
    ts0 = time.time()
    for seg_path in all_segs_paths:
        run_inference(speaker, seg_path, f0_method, transpose, noise_scale, cluster_ratio)
    final_filename = f"output{Path(file_path).suffix}"
    concatenate_segments(INFERENCE_OUTPUT_DIRNAME, final_filename)
    shutil.move(Path(INFERENCE_OUTPUT_DIRNAME, final_filename), Path(final_filename))
    cleanup_dirs()
    os.remove(file_path)
    ts1 = time.time()
    tiempo1 = int(ts1 - ts0)
    return final_filename, gr.Textbox.update("👌 ¡Voz cambiada!", label=f"Tiempo total: {tiempo1} segundos")

def clear():
    shutil.rmtree(SEGMENTS_DIRNAME, ignore_errors=True)
    shutil.rmtree(INFERENCE_OUTPUT_DIRNAME, ignore_errors=True)
    tmp_files = glob.glob("*.tmp")
    for f in tmp_files:
        os.remove(f)

    return gr.Dropdown.update(value="Elige un modelo de voz"), None, gr.Textbox.update("🗑️ Datos borrados.", label=f"Información")


css = """
.gradio-container {
    font-family: 'IBM Plex Sans', sans-serif;
}
footer {
    visibility: hidden;
    display: none;
}
.center-container {
    display: flex;
    flex-direction: column;
    align-items: center;
    justify-content: center;
}
"""

with gr.Blocks(
    css=css,
    title="VoiceIt! - Pavloh",
    theme=gr.themes.Soft(
        primary_hue="cyan",
        secondary_hue="blue",
        radius_size="lg",
        text_size="lg"
    ).set(loader_color="#0B0F19", shadow_drop='*shadow_drop_lg', block_border_width="3px")
) as pavloh:
  gr.HTML(
          """
      <div class="center-container">
          <img src="https://i.imgur.com/DendqCA.png" style="width: 300px; height: auto;"/><br>
          <div style="display: flex; justify-content: center;">
              <a href="https://github.com/ImPavloh/voiceit/blob/main/LICENSE" target="_blank">
                  <img src="https://img.shields.io/github/license/impavloh/voiceit?style=for-the-badge&logo=github&logoColor=white" alt="Licencia">
              </a>
              <a href="https://github.com/impavloh/voiceit" target="_blank">
                  <img src="https://img.shields.io/badge/repositorio-%23121011.svg?style=for-the-badge&logo=github&logoColor=white" alt="GitHub">
              </a>
              <form action="https://www.paypal.com/donate" method="post" target="_blank">
                <input type="hidden" name="hosted_button_id" value="6FPWP9AWEKSWJ" />
                <input type="image" src="https://img.shields.io/badge/apoyar-%2300457C.svg?style=for-the-badge&logo=paypal&logoColor=white" border="0" name="submit" alt="Botón donar con PayPal" />
                <img alt="" border="0" src="https://www.paypal.com/es_ES/i/scr/pixel.gif" width="1" height="1" />
              </form></center>
              <a href="https://twitter.com/impavloh" target="_blank">
                  <img src="https://img.shields.io/badge/Seguir-%231DA1F2.svg?style=for-the-badge&logo=twitter&logoColor=white" alt="Twitter">
              </a>
          </div>
          <div style="display: inline-flex;align-items: center;gap: 0.8rem;font-size: 1.75rem;">
              <h1 style="font-weight: 900; margin-bottom: 7px;margin-top:5px">🗣️ VoiceIt! - Un proyecto de  <a style="text-decoration: underline;" href="https://twitter.com/impavloh">Pavloh</a></h1>
          </div>
          <p style="margin-bottom: 10px; font-size: 94%; line-height: 23px;">Cambia la voz de audios utilizando modelos pre-entrenados de streamers.</p>
      </div>
        """
  )

  with gr.Row(elem_id="1").style(equal_height=True):
        with gr.Column():
            d1 = gr.Dropdown([x["name"] for x in get_speakers()], label="📦 Selecciona un modelo", value="Elige un modelo de voz")
            audio = gr.Audio(label="🗣️ Sube un audio", type="filepath")
        with gr.Column():
            a2 = gr.Audio(label="🔊 Resultado", type="filepath")
            t1 = gr.Textbox(type="text", label="📄 Información", value="Elige un modelo y un audio para cambiar la voz.")
  with gr.Row():
    b0 = gr.Button("🗑️ Borrar")
    b1 = gr.Button("🎤 Cambiar voz",variant="primary")
    b0.click(clear, outputs=[d1, audio, t1])
    b1.click(convert, inputs=[d1, audio], outputs=[a2, t1])
    
  with gr.Row():
        with gr.Accordion(label="Licencia", open=False):
            gr.HTML("""
                <center>
                    <p>
                        <i> Ten en cuenta que los audios deben contener solamente una voz y estar libres de ruido o música de fondo. </i>
                    </p>
                    <p>
                        <i> Asegúrate de que el nombre del archivo no contenga espacios ni símbolos raros, utilizando solo caracteres alfanuméricos y guiones bajos (_) para separar palabras si es necesario. </i>
                    </p>
                    <p>
                        <i> En caso de que tarde más de 5 minutos procesar el audio, es posible que se produzca un error debido a los límites del servidor. En ese caso, tendrás que utilizar VoiceIt localmente usando tus recursos. </i>
                    </p>
                    <p>
                        <i> Al utilizar este sitio web, aceptas la <a style="text-decoration: underline;" href="https://github.com/ImPavloh/voiceit/blob/main/LICENSE">licencia</a> y <a style="text-decoration: underline;" href="https://github.com/ImPavloh/voiceit/blob/main/TERMINOS_DE_USO.txt">condiciones de uso</a>. </i>
                    </p>
                </center>
            """)
            
if __name__ == "__main__":
    pavloh.launch(enable_queue=True)