""" # Melody Generation Model Development # Project: Opentunes.ai This notebook implements a Transformer-based melody generation model. The model takes text prompts and generates musical melodies in MIDI format. Key Features: - Text-to-melody generation - MIDI file handling - Transformer architecture - Training pipeline integration with HuggingFace Note: This is a starting point and might need adjustments based on: - Specific musical requirements - Available training data - Computational resources - Desired output format """ import torch import torch.nn as nn from transformers import ( AutoModelForAudio, AutoTokenizer, Trainer, TrainingArguments ) import librosa import numpy as np import pandas as pd import music21 from pathlib import Path import json import wandb # for experiment tracking # ===================================== # 1. Data Loading and Preprocessing # ===================================== class MelodyDataset(torch.utils.data.Dataset): """ Custom Dataset class for handling melody data. This class: - Loads MIDI files from a directory - Converts MIDI files to sequences of notes and durations - Provides data in format suitable for model training Args: data_dir (str): Directory containing MIDI files max_length (int): Maximum sequence length (default: 512) Features: - Handles variable-length MIDI files - Converts complex MIDI structures to simple note sequences - Implements efficient data loading and preprocessing """ def __init__(self, data_dir, max_length=512): self.data_dir = Path(data_dir) self.max_length = max_length self.midi_files = list(self.data_dir.glob("*.mid")) # Initialize tokenizer for text prompts self.tokenizer = AutoTokenizer.from_pretrained("t5-small") print(f"Found {len(self.midi_files)} MIDI files in {data_dir}") def midi_to_sequence(self, midi_path): """ Convert MIDI file to sequence of notes. Args: midi_path (Path): Path to MIDI file Returns: list: List of dictionaries containing note information Each dict has 'pitch', 'duration', and 'offset' Example output: [ {'pitch': 60, 'duration': 1.0, 'offset': 0.0}, # Middle C, quarter note {'pitch': 64, 'duration': 0.5, 'offset': 1.0}, # E, eighth note ... ] """ score = music21.converter.parse(str(midi_path)) notes = [] # Extract notes and their properties for n in score.flat.notesAndRests: if isinstance(n, music21.note.Note): notes.append({ 'pitch': n.pitch.midi, # MIDI pitch number (0-127) 'duration': n.duration.quarterLength, # Duration in quarter notes 'offset': n.offset # Start time in quarter notes }) return notes def __getitem__(self, idx): """ Get a single item from the dataset. Args: idx (int): Index of the item Returns: dict: Dictionary containing: - 'notes': Tensor of note pitches - 'durations': Tensor of note durations Note: Both tensors are padded/truncated to max_length """ midi_file = self.midi_files[idx] melody_sequence = self.midi_to_sequence(midi_file) # Convert to tensors with padding/truncation notes = torch.tensor([n['pitch'] for n in melody_sequence]) durations = torch.tensor([n['duration'] for n in melody_sequence]) # Pad or truncate sequences if len(notes) < self.max_length: # Pad with rest values pad_length = self.max_length - len(notes) notes = torch.cat([notes, torch.zeros(pad_length)]) durations = torch.cat([durations, torch.zeros(pad_length)]) else: # Truncate to max_length notes = notes[:self.max_length] durations = durations[:self.max_length] return { 'notes': notes, 'durations': durations, } def __len__(self): return len(self.midi_files)