CODE / yt2mid /app.py
shethjenil's picture
Upload folder using huggingface_hub
f3b54ac verified
import gradio as gr
from pytubefix import Search
from io import BytesIO
from librosa import load as librosa_load
from pydub import AudioSegment
from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
model = Pop2PianoForConditionalGeneration.from_pretrained("model")
processor = Pop2PianoProcessor.from_pretrained("model")
def get_audio_from_youtube(search_string):
stream = Search(search_string).results[0].streams.filter(only_audio=True).first()
audio_buffer = BytesIO()
stream.stream_to_buffer(audio_buffer) # Directly stream to the in-memory buffer
audio_buffer.seek(0) # Reset the pointer to the beginning of the buffer
audio = AudioSegment.from_file(audio_buffer)
mp3_buffer = BytesIO() # New buffer to store the MP3 data
audio.export(mp3_buffer, format="mp3")
mp3_buffer.seek(0) # Reset pointer to the beginning of the buffer
return librosa_load(mp3_buffer, sr=None)
def search2midi(query: str,composer:int)->tuple[gr.Audio, str]:
data , sr = get_audio_from_youtube(query)
inputs = processor(audio=data, sampling_rate=sr, return_tensors="pt").to("cpu")
model_output = model.generate(input_features=inputs["input_features"], composer="composer"+str(composer))
tokenizer_output = processor.batch_decode(token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu"))["pretty_midi_objects"][0]
# midi = io.BytesIO()
# tokenizer_output.write(midi)
# midi.seek(0)
tokenizer_output.write('output.mid')
return (sr,data), 'output.mid'
gr.Interface(fn=search2midi, inputs=[gr.Text(label="Search and Select song in YT",placeholder="Mere dholna sun arijit song",autofocus=True),gr.Number(label="Composer Number",minimum=1,maximum=21,value=1)], outputs=[gr.Audio(format="mp3"),gr.DownloadButton(label="Download Midi")]).launch()