import gradio as gr from pytubefix import Search from io import BytesIO from librosa import load as librosa_load from pydub import AudioSegment from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor model = Pop2PianoForConditionalGeneration.from_pretrained("model") processor = Pop2PianoProcessor.from_pretrained("model") def get_audio_from_youtube(search_string): stream = Search(search_string).results[0].streams.filter(only_audio=True).first() audio_buffer = BytesIO() stream.stream_to_buffer(audio_buffer) # Directly stream to the in-memory buffer audio_buffer.seek(0) # Reset the pointer to the beginning of the buffer audio = AudioSegment.from_file(audio_buffer) mp3_buffer = BytesIO() # New buffer to store the MP3 data audio.export(mp3_buffer, format="mp3") mp3_buffer.seek(0) # Reset pointer to the beginning of the buffer return librosa_load(mp3_buffer, sr=None) def search2midi(query: str,composer:int)->tuple[gr.Audio, str]: data , sr = get_audio_from_youtube(query) inputs = processor(audio=data, sampling_rate=sr, return_tensors="pt").to("cpu") model_output = model.generate(input_features=inputs["input_features"], composer="composer"+str(composer)) tokenizer_output = processor.batch_decode(token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu"))["pretty_midi_objects"][0] # midi = io.BytesIO() # tokenizer_output.write(midi) # midi.seek(0) tokenizer_output.write('output.mid') return (sr,data), 'output.mid' gr.Interface(fn=search2midi, inputs=[gr.Text(label="Search and Select song in YT",placeholder="Mere dholna sun arijit song",autofocus=True),gr.Number(label="Composer Number",minimum=1,maximum=21,value=1)], outputs=[gr.Audio(format="mp3"),gr.DownloadButton(label="Download Midi")]).launch()