" + title + "

import os

os.system("python3 -m pip install -e .")

import gradio as gr

import note_seq
from pytube import YouTube
from pydub import AudioSegment

from inferencemodel import InferenceModel
from utils import upload_audio

SAMPLE_RATE = 16000
SF2_PATH = "SGM-v2.01-Sal-Guit-Bass-V1.3.sf2"

# Start inference model
inference_model = InferenceModel("/home/user/app/checkpoints/mt3/", "mt3")
current_model = "mt3"

def change_model(model):
    global current_model
    if model == current_model:
        return
    global inference_model 
    inference_model = InferenceModel("/home/user/app/checkpoints/mt3/", model)
    current_model = model
    print("Inferece model", inference_model)

# Credits https://huggingface.co/spaces/rajesh1729/youtube-video-transcription-with-whisper
def get_audio(url):
    yt = YouTube(url)
    video = yt.streams.filter(only_audio=True).first()
    out_file = video.download(output_path=".")
    base, ext = os.path.splitext(out_file)
    new_file = base + ".wav"
    os.rename(out_file, new_file)
    a = new_file
    print("file a is:", a)
    wav_to_cut = AudioSegment.from_wav(a)
    # pydub does things in milliseconds
    ten_seconds = 10 * 1000
    first_10_seconds = wav_to_cut[:ten_seconds]
    os.remove(new_file)
    return first_10_seconds

# Credits https://huggingface.co/spaces/jeffistyping/Youtube-Whisperer
def populate_metadata(link):
    yt = YouTube(link)
    audio = get_audio(link)
    return yt.thumbnail_url, yt.title, audio

def inference(audio):
    with open(audio, "rb") as fd:
        contents = fd.read()

    audio = upload_audio(contents,sample_rate=16000)

    est_ns = inference_model(audio)
    
    note_seq.sequence_proto_to_midi_file(est_ns, "./transcribed.mid")
    
    return "./transcribed.mid"
  
title = "Transcribe music from YouTube videos using Transformers."
description = """
Gradio demo for Music Transcription with Transformers. Read more in the links below.
"""
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.03017' target='_blank'>MT3: Multi-Task Multitrack Music Transcription</a> | <a href='https://github.com/magenta/mt3' target='_blank'>Github Repo</a></p>"

# Create a block object
demo = gr.Blocks()

# Use your Block object as a context
with demo:
    gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>" 
                + title 
                + "</h1>")
    gr.Markdown(description)
    with gr.Box():
        gr.Markdown("""
                    Select your model: The ismir2021 model transcribes piano only, with note velocities. 
                    The mt3 model transcribes multiple simultaneous instruments, but without velocities.
                    """)
        model = gr.Radio(
            ["mt3", "ismir2021"], label="What kind of model you want to use?", value="mt3"
        )
        model.change(fn=change_model, inputs=model, outputs=[])

        link = gr.Textbox(label="YouTube Link")
        with gr.Row().style(mobile_collapse=False, equal_height=True):
            title = gr.Label(label="Video Title", placeholder="Title")
            img = gr.Image(label="Thumbnail")
        with gr.Row():
            yt_audio = gr.Audio()

        link.change(fn=populate_metadata, inputs=link, outputs=[img, title, yt_audio])
        
        with gr.Row():
            btn = gr.Button("Transcribe music")
        
        audio_file = gr.File()
        
        btn.click(inference,
                  inputs = [
                      yt_audio
                  ],
                  outputs=audio_file)
        

demo.launch()  
  
""" gr.Interface(
    inference, 
    gr.inputs.Audio(type="filepath", label="Input"), 
    [gr.outputs.File(label="Output")],
    title=title,
    description=description,
    article=article,
    examples=examples,
    ).launch().queue() """