bixoryai
/

opentunes-ai

+"""
+# Melody Generation Model Development
+# Project: Opentunes.ai
+This notebook implements a Transformer-based melody generation model.
+The model takes text prompts and generates musical melodies in MIDI format.
+Key Features:
+- Text-to-melody generation
+- MIDI file handling
+- Transformer architecture
+- Training pipeline integration with HuggingFace
+Note: This is a starting point and might need adjustments based on:
+- Specific musical requirements
+- Available training data
+- Computational resources
+- Desired output format
+"""
+import torch
+import torch.nn as nn
+from transformers import (
+    AutoModelForAudio,
+    AutoTokenizer,
+    Trainer,
+    TrainingArguments
+)
+import librosa
+import numpy as np
+import pandas as pd
+import music21
+from pathlib import Path
+import json
+import wandb  # for experiment tracking
+# =====================================
+# 1. Data Loading and Preprocessing
+# =====================================
+class MelodyDataset(torch.utils.data.Dataset):
+    """
+    Custom Dataset class for handling melody data.
+    This class:
+    - Loads MIDI files from a directory
+    - Converts MIDI files to sequences of notes and durations
+    - Provides data in format suitable for model training
+    Args:
+        data_dir (str): Directory containing MIDI files
+        max_length (int): Maximum sequence length (default: 512)
+    Features:
+    - Handles variable-length MIDI files
+    - Converts complex MIDI structures to simple note sequences
+    - Implements efficient data loading and preprocessing
+    """
+    def __init__(self, data_dir, max_length=512):
+        self.data_dir = Path(data_dir)
+        self.max_length = max_length
+        self.midi_files = list(self.data_dir.glob("*.mid"))
+        # Initialize tokenizer for text prompts
+        self.tokenizer = AutoTokenizer.from_pretrained("t5-small")
+        print(f"Found {len(self.midi_files)} MIDI files in {data_dir}")
+    def midi_to_sequence(self, midi_path):
+        """
+        Convert MIDI file to sequence of notes.
+        Args:
+            midi_path (Path): Path to MIDI file
+        Returns:
+            list: List of dictionaries containing note information
+                  Each dict has 'pitch', 'duration', and 'offset'
+        Example output:
+        [
+            {'pitch': 60, 'duration': 1.0, 'offset': 0.0},  # Middle C, quarter note
+            {'pitch': 64, 'duration': 0.5, 'offset': 1.0},  # E, eighth note
+            ...
+        ]
+        """
+        score = music21.converter.parse(str(midi_path))
+        notes = []
+        # Extract notes and their properties
+        for n in score.flat.notesAndRests:
+            if isinstance(n, music21.note.Note):
+                notes.append({
+                    'pitch': n.pitch.midi,  # MIDI pitch number (0-127)
+                    'duration': n.duration.quarterLength,  # Duration in quarter notes
+                    'offset': n.offset  # Start time in quarter notes
+                })
+        return notes
+    def __getitem__(self, idx):
+        """
+        Get a single item from the dataset.
+        Args:
+            idx (int): Index of the item
+        Returns:
+            dict: Dictionary containing:
+                - 'notes': Tensor of note pitches
+                - 'durations': Tensor of note durations
+        Note: Both tensors are padded/truncated to max_length
+        """
+        midi_file = self.midi_files[idx]
+        melody_sequence = self.midi_to_sequence(midi_file)
+        # Convert to tensors with padding/truncation
+        notes = torch.tensor([n['pitch'] for n in melody_sequence])
+        durations = torch.tensor([n['duration'] for n in melody_sequence])
+        # Pad or truncate sequences
+        if len(notes) < self.max_length:
+            # Pad with rest values
+            pad_length = self.max_length - len(notes)
+            notes = torch.cat([notes, torch.zeros(pad_length)])
+            durations = torch.cat([durations, torch.zeros(pad_length)])
+        else:
+            # Truncate to max_length
+            notes = notes[:self.max_length]
+            durations = durations[:self.max_length]
+        return {
+            'notes': notes,
+            'durations': durations,
+        }
+    def __len__(self):
+        return len(self.midi_files)