text2midi

Sleeping

App Files Files Community

keshavbhandari commited on Dec 21, 2024

Commit

bdf45ad

1 Parent(s): 626a7e2

Refactor generate_midi function and remove commented code

Browse files

Files changed (1) hide show

app.py +45 -45

app.py CHANGED Viewed

@@ -68,52 +68,52 @@ def save_wav(filepath):
 #     modified_midi.dump_midi(Path(output_midi_path))
-# def generate_midi(caption, temperature=0.9, max_len=500):
-#     device = 'cuda' if torch.cuda.is_available() else 'cpu'
-#     artifact_folder = 'artifacts'
-#     tokenizer_filepath = os.path.join(artifact_folder, "vocab_remi.pkl")
-#     # Load the tokenizer dictionary
-#     with open(tokenizer_filepath, "rb") as f:
-#         r_tokenizer = pickle.load(f)
-#     # Get the vocab size
-#     vocab_size = len(r_tokenizer)
-#     print("Vocab size: ", vocab_size)
-#     model = Transformer(vocab_size, 768, 8, 5000, 18, 1024, False, 8, device=device)
-#     model_path = os.path.join(artifact_folder, "pytorch_model_140.bin")
-#     model.load_state_dict(torch.load(model_path, map_location=device))
-#     model.eval()
-#     tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
-#     inputs = tokenizer(caption, return_tensors='pt', padding=True, truncation=True)
-#     input_ids = nn.utils.rnn.pad_sequence(inputs.input_ids, batch_first=True, padding_value=0)
-#     input_ids = input_ids.to(device)
-#     attention_mask =nn.utils.rnn.pad_sequence(inputs.attention_mask, batch_first=True, padding_value=0)
-#     attention_mask = attention_mask.to(device)
-#     output = model.generate(input_ids, attention_mask, max_len=max_len,temperature = temperature)
-#     output_list = output[0].tolist()
-#     generated_midi = r_tokenizer.decode(output_list)
-#     generated_midi.dump_midi("output.mid")
-#     post_processing("output.mid", "output.mid")
-# @spaces.GPU(duration=120)
-# def gradio_generate(prompt, temperature, max_length):
-#     # Generate midi
-#     generate_midi(prompt, temperature, max_length)
-#     # Convert midi to wav
-#     midi_filename = "output.mid"
-#     save_wav(midi_filename)
-#     wav_filename = midi_filename.replace(".mid", ".wav")
-#     # Read the generated WAV file
-#     output_wave, samplerate = sf.read(wav_filename, dtype='float32')
-#     temp_wav_filename = "temp.wav"
-#     wavio.write(temp_wav_filename, output_wave, rate=16000, sampwidth=2)
-#     return temp_wav_filename, midi_filename  # Return both WAV and MIDI file paths
 @spaces.GPU(duration=120)
 def gradio_generate(prompt, temperature, max_length):

 #     modified_midi.dump_midi(Path(output_midi_path))
+def generate_midi(caption, temperature=0.9, max_len=500):
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    artifact_folder = 'artifacts'
+    tokenizer_filepath = os.path.join(artifact_folder, "vocab_remi.pkl")
+    # Load the tokenizer dictionary
+    with open(tokenizer_filepath, "rb") as f:
+        r_tokenizer = pickle.load(f)
+    # Get the vocab size
+    vocab_size = len(r_tokenizer)
+    print("Vocab size: ", vocab_size)
+    model = Transformer(vocab_size, 768, 8, 2048, 18, 1024, False, 8, device=device)
+    model_path = os.path.join("amaai-lab/text2midi", "pytorch_model.bin")
+    model.load_state_dict(torch.load(model_path, map_location=device))
+    model.eval()
+    tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
+    inputs = tokenizer(caption, return_tensors='pt', padding=True, truncation=True)
+    input_ids = nn.utils.rnn.pad_sequence(inputs.input_ids, batch_first=True, padding_value=0)
+    input_ids = input_ids.to(device)
+    attention_mask =nn.utils.rnn.pad_sequence(inputs.attention_mask, batch_first=True, padding_value=0)
+    attention_mask = attention_mask.to(device)
+    output = model.generate(input_ids, attention_mask, max_len=max_len,temperature = temperature)
+    output_list = output[0].tolist()
+    generated_midi = r_tokenizer.decode(output_list)
+    generated_midi.dump_midi("output.mid")
+    # post_processing("output.mid", "output.mid")
+@spaces.GPU(duration=120)
+def gradio_generate(prompt, temperature, max_length):
+    # Generate midi
+    generate_midi(prompt, temperature, max_length)
+    # Convert midi to wav
+    midi_filename = "output.mid"
+    save_wav(midi_filename)
+    wav_filename = midi_filename.replace(".mid", ".wav")
+    # Read the generated WAV file
+    output_wave, samplerate = sf.read(wav_filename, dtype='float32')
+    temp_wav_filename = "temp.wav"
+    wavio.write(temp_wav_filename, output_wave, rate=16000, sampwidth=2)
+    return temp_wav_filename, midi_filename  # Return both WAV and MIDI file paths
 @spaces.GPU(duration=120)
 def gradio_generate(prompt, temperature, max_length):