sander-wood commited on Oct 17, 2024

Commit

3c428bc

verified ·

1 Parent(s): 5c1bee3

Upload 32 files

Browse files

Files changed (33) hide show

.gitattributes +1 -0
LICENSE +21 -0
benchmarks.z01 +3 -0
benchmarks.zip +3 -0
code/README.md +90 -0
code/config.py +67 -0
code/extract_clamp2.py +192 -0
code/extract_m3.py +162 -0
code/logs_clamp2_h_size_768_lr_5e-05_batch_128_scale_1_t_length_128_t_model_FacebookAI_xlm-roberta-base_t_dropout_True_m3_True.txt +500 -0
code/logs_m3_p_size_64_p_length_512_t_layers_3_p_layers_12_h_size_768_lr_0.0001_batch_16_mask_0.45.txt +500 -0
code/train_clamp2.py +356 -0
code/train_m3.py +321 -0
code/utils.py +483 -0
environment.yml +204 -0
music_classification/README.md +46 -0
music_classification/config.py +26 -0
music_classification/inference_cls.py +71 -0
music_classification/train_cls.py +293 -0
music_classification/utils.py +22 -0
process_data/README.md +307 -0
process_data/batch_abc2xml.py +58 -0
process_data/batch_interleaved_abc.py +117 -0
process_data/batch_midi2mtf.py +80 -0
process_data/batch_mtf2midi.py +97 -0
process_data/batch_xml2abc.py +57 -0
process_data/gpt4_summarize.py +250 -0
process_data/utils/abc2xml.py +0 -0
process_data/utils/pyparsing.py +0 -0
process_data/utils/xml2abc.py +1582 -0
semantic_search/README.md +62 -0
semantic_search/clamp2_score.py +57 -0
semantic_search/semantic_search.py +51 -0
semantic_search/semantic_search_metrics.py +78 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 overview.jpg filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 overview.jpg filter=lfs diff=lfs merge=lfs -text
+benchmarks.z01 filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 Sander Wood
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

benchmarks.z01 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d88d8f54b46b5ea5ce2ba318eaf7393ef6aa63a1a7fa5247b4c4cdb9b917f6b0
+size 20971520

benchmarks.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6497fc7f335e8f0a5bbddd474d040a218ef13488e280f5ad0b32b51ca036e89f
+size 14043301

code/README.md ADDED Viewed

	@@ -0,0 +1,90 @@

+# CLaMP 2 Codebase
+## Overview
+CLaMP 2 is a state-of-the-art multimodal music information retrieval system designed to work with 101 languages. This codebase includes scripts for training models, extracting features, and utility functions for processing music and text data. Below is a description of the scripts contained in the `code/` folder.
+## Repository Structure
+The `code/` folder contains the following scripts:
+### 1. `config.py`
+This script contains the training hyperparameters and file paths used in the `train_clamp2.py` and `train_m3.py` scripts. You can modify parameters such as learning rates, batch sizes, and file locations for training data.
+### 2. `extract_clamp2.py`
+This script utilizes the pre-trained CLaMP 2 model to extract representations of text (.txt) or music (.abc or .mtf) from a specified input folder and save the features to a target output folder in `.npy` format. The extracted features can be normalized for semantic search or retain temporal information for classification tasks.
+**Usage:**
+```bash
+python extract_clamp2.py <input_dir> <output_dir> [--normalize]
+```
+- `input_dir`: Directory containing input data files.
+- `output_dir`: Directory to save the output features.
+- `--normalize`: (Optional) Normalize the extracted features. Normalization is not required for music classification tasks, but it is required for semantic search tasks.
+### 3. `extract_m3.py`
+This script employs the pre-trained M3 model to extract representations in interleaved ABC notation and MIDI Text Format (MTF) from the specified input folder, saving the features to the target folder as `.npy` files.
+**Usage:**
+```bash
+python extract_m3.py <input_dir> <output_dir>
+```
+- `input_dir`: Directory with input files (in .abc or .mtf format).
+- `output_dir`: Directory to save extracted features.
+### 4. `train_clamp2.py`
+This script manages the training process for the CLaMP 2 model. It prepares training data from a path specified in the `TRAIN_JSONL` variable, which is defined in the `config.py` file. If `EVAL_JSONL` is provided in the configuration, it will be used for validation. By default, 1% of the training data is reserved for validation.
+CLaMP 2 utilizes the multilingual text encoder `FacebookAI/xlm-roberta-base` for processing text data. Additionally, it employs the M3 model, pre-trained on both ABC and MIDI data, as the multimodal music encoder. If the pre-trained weights for M3 are available and the configuration variable `CLAMP2_LOAD_M3` is set to True, the training script will automatically load the M3 weights.
+**Training Command:**
+To start the training process, use the following command:
+```bash
+torch.distributed.launch --nproc_per_node=<number_of_GPUs> --use_env train_clamp2.py
+```
+Replace `<number_of_GPUs>` with the number of GPUs you want to use for training.
+**Input Data Format**
+The input training data should be in JSONL format, where each line contains a single JSON object with the following structure. Fields that do not apply should be set to `None`:
+```json
+{
+  "title": "Song Title",
+  "composer": "Composer Name",
+  "genres": ["Genre1", "Genre2"],
+  "description": "Song description.",
+  "lyrics": "Song lyrics.",
+  "tags": ["tag1", "tag2"],
+  "ensembles": ["Ensemble Name"],
+  "instruments": ["Instrument1", "Instrument2"],
+  "summary_en": "English summary.",
+  "summary_nen": {
+    "language": "Language Name",
+    "summary": "Summary in specified language."
+  },
+  "filepaths": [
+    "path/to/abc/file.abc",
+    "path/to/mtf/file.mtf"
+  ]
+}
+```
+For obtaining the English and non-English summaries generated by GPT-4, refer to the `process_data/gpt4_summarize.py` script.
+### 5. `train_m3.py`
+This script is dedicated to training the M3 model using interleaved ABC and MTF files. The directories for training and optional evaluation data should be specified in the `TRAIN_FOLDERS` and `EVAL_FOLDERS` variables, respectively.
+**Training Command:**
+To start the training process for the M3 model, use the following command:
+```bash
+torch.distributed.launch --nproc_per_node=<number_of_GPUs> --use_env train_m3.py
+```
+Replace `<number_of_GPUs>` with the number of GPUs you want to use for training.
+**Data Preparation:**
+The data should be structured in interleaved ABC (.abc) and MTF (.mtf) formats. Please refer to the `process_data/` folder for instructions on how to prepare these formats.
+### 6. `utils.py`
+This utility script contains various classes for model definitions and functions used for training.

code/config.py ADDED Viewed

	@@ -0,0 +1,67 @@

+EVAL_SPLIT = 0.01  # Fraction of training data used for evaluation
+WANDB_KEY = "<your_wandb_key>"  # Set M3/CLaMP2_WANDB_LOG=False if no API key for Weights and Biases logging
+# -------------------- Configuration for M3 Training --------------------
+TRAIN_FOLDERS = [
+    "<path_to_training_data>"  # Directory containing training data
+]
+EVAL_FOLDERS = [
+    ""  # (Optional) Directory containing evaluation data
+]
+PATCH_SIZE = 64  # Size of each patch
+PATCH_LENGTH = 512  # Length of the patches
+PATCH_NUM_LAYERS = 12  # Number of layers in the encoder
+TOKEN_NUM_LAYERS = 3  # Number of layers in the decoder
+M3_HIDDEN_SIZE = 768  # Size of the hidden layer
+M3_NUM_EPOCH = 100  # Maximum number of epochs for training
+M3_LEARNING_RATE = 1e-4  # Learning rate for the optimizer
+M3_BATCH_SIZE = 16  # Batch size per GPU (single card) during training
+M3_MASK_RATIO = 0.45  # Ratio of masked elements during training
+M3_DETERMINISTIC = True  # Ensures deterministic results with random seeds
+M3_WANDB_LOG = True  # Enable logging to Weights and Biases
+M3_LOAD_CKPT = True  # Load model weights from a checkpoint if available
+M3_WEIGHTS_PATH = (
+    "weights_m3_p_size_" + str(PATCH_SIZE) +
+    "_p_length_" + str(PATCH_LENGTH) +
+    "_t_layers_" + str(TOKEN_NUM_LAYERS) +
+    "_p_layers_" + str(PATCH_NUM_LAYERS) +
+    "_h_size_" + str(M3_HIDDEN_SIZE) +
+    "_lr_" + str(M3_LEARNING_RATE) +
+    "_batch_" + str(M3_BATCH_SIZE) +
+    "_mask_" + str(M3_MASK_RATIO) + ".pth"
+)  # Path to store the model weights
+M3_LOGS_PATH = M3_WEIGHTS_PATH.replace("weights", "logs").replace("pth", "txt")  # Path to save training logs
+# -------------------- Configuration for CLaMP2 Training ----------------
+TRAIN_JSONL = "<path_to_training_jsonl>"  # Path to the JSONL file with training data
+EVAL_JSONL = ""  # (Optional) Path to the JSONL file with evaluation data
+CLAMP2_HIDDEN_SIZE = 768  # Size of the hidden layer
+TEXT_MODEL_NAME = "FacebookAI/xlm-roberta-base"  # Name of the pre-trained text model
+CLAMP2_NUM_EPOCH = 100  # Maximum number of epochs for training
+CLAMP2_LEARNING_RATE = 5e-5  # Learning rate for the optimizer
+CLAMP2_BATCH_SIZE = 128  # Batch size per GPU (single card) during training
+LOGIT_SCALE = 1  # Scaling factor for contrastive loss
+MAX_TEXT_LENGTH = 128  # Maximum allowed length for text input
+TEXT_DROPOUT = True  # Whether to apply dropout during text processing
+CLAMP2_DETERMINISTIC = True  # Ensures deterministic results with random seeds
+CLAMP2_LOAD_M3 = True  # Load weights from the M3 model
+CLAMP2_WANDB_LOG = True  # Enable logging to Weights and Biases
+CLAMP2_LOAD_CKPT = True  # Load weights from a checkpoint if available
+CLAMP2_WEIGHTS_PATH = (
+    "weights_clamp2_h_size_" + str(CLAMP2_HIDDEN_SIZE) +
+    "_lr_" + str(CLAMP2_LEARNING_RATE) +
+    "_batch_" + str(CLAMP2_BATCH_SIZE) +
+    "_scale_" + str(LOGIT_SCALE) +
+    "_t_length_" + str(MAX_TEXT_LENGTH) +
+    "_t_model_" + TEXT_MODEL_NAME.replace("/", "_") +
+    "_t_dropout_" + str(TEXT_DROPOUT) +
+    "_m3_" + str(CLAMP2_LOAD_M3) + ".pth"
+)  # Path to store CLaMP2 model weights
+CLAMP2_LOGS_PATH = CLAMP2_WEIGHTS_PATH.replace("weights", "logs").replace("pth", "txt")  # Path to save training logs

code/extract_clamp2.py ADDED Viewed

	@@ -0,0 +1,192 @@

+import os
+import json
+import random
+import torch
+import numpy as np
+from tqdm import tqdm
+from config import *
+from utils import *
+from samplings import *
+from accelerate import Accelerator
+from transformers import BertConfig, AutoTokenizer
+import argparse
+# Parse command-line arguments
+parser = argparse.ArgumentParser(description="Feature extraction for CLaMP2.")
+parser.add_argument("input_dir", type=str, help="Directory containing input data files.")
+parser.add_argument("output_dir", type=str, help="Directory to save the output features.")
+parser.add_argument("--normalize", action="store_true", help="Normalize the extracted features.")
+args = parser.parse_args()
+# Retrieve arguments
+input_dir = args.input_dir
+output_dir = args.output_dir
+normalize = args.normalize
+os.makedirs("logs", exist_ok=True)
+for file in ["logs/files_extract_clamp2.json",
+             "logs/files_shuffle_extract_clamp2.json",
+             "logs/log_extract_clamp2.txt",
+             "logs/pass_extract_clamp2.txt",
+             "logs/skip_extract_clamp2.txt"]:
+    if os.path.exists(file):
+        os.remove(file)
+files = []
+for root, dirs, fs in os.walk(input_dir):
+    for f in fs:
+        if f.endswith(".txt") or f.endswith(".abc") or f.endswith(".mtf"):
+            files.append(os.path.join(root, f))
+print(f"Found {len(files)} files in total")
+with open("logs/files_extract_clamp2.json", "w", encoding="utf-8") as f:
+    json.dump(files, f)
+random.shuffle(files)
+with open("logs/files_shuffle_extract_clamp2.json", "w", encoding="utf-8") as f:
+    json.dump(files, f)
+accelerator = Accelerator()
+device = accelerator.device
+print("Using device:", device)
+with open("logs/log_extract_clamp.txt", "a", encoding="utf-8") as f:
+    f.write("Using device: " + str(device) + "\n")
+m3_config = BertConfig(vocab_size=1,
+                        hidden_size=M3_HIDDEN_SIZE,
+                        num_hidden_layers=PATCH_NUM_LAYERS,
+                        num_attention_heads=M3_HIDDEN_SIZE//64,
+                        intermediate_size=M3_HIDDEN_SIZE*4,
+                        max_position_embeddings=PATCH_LENGTH)
+model = CLaMP2Model(m3_config,
+                    text_model_name=TEXT_MODEL_NAME,
+                    hidden_size=CLAMP2_HIDDEN_SIZE,
+                    load_m3=CLAMP2_LOAD_M3)
+model = model.to(device)
+tokenizer = AutoTokenizer.from_pretrained(TEXT_MODEL_NAME)
+patchilizer = M3Patchilizer()
+# print parameter number
+print("Parameter Number: "+str(sum(p.numel() for p in model.parameters() if p.requires_grad)))
+model.eval()
+checkpoint = torch.load(CLAMP2_WEIGHTS_PATH, map_location='cpu', weights_only=True)
+print(f"Successfully Loaded CLaMP 2 Checkpoint from Epoch {checkpoint['epoch']} with loss {checkpoint['min_eval_loss']}")
+model.load_state_dict(checkpoint['model'])
+def extract_feature(filename, get_normalized=normalize):
+    with open(filename, "r", encoding="utf-8") as f:
+        item = f.read()
+    if filename.endswith(".txt"):
+        item = list(set(item.split("\n")))
+        item = "\n".join(item)
+        item = item.split("\n")
+        item = [c for c in item if len(c) > 0]
+        item = tokenizer.sep_token.join(item)
+        input_data = tokenizer(item, return_tensors="pt")
+        input_data = input_data['input_ids'].squeeze(0)
+        max_input_length = MAX_TEXT_LENGTH
+    else:
+        input_data = patchilizer.encode(item, add_special_patches=True)
+        input_data = torch.tensor(input_data)
+        max_input_length = PATCH_LENGTH
+    segment_list = []
+    for i in range(0, len(input_data), max_input_length):
+        segment_list.append(input_data[i:i+max_input_length])
+    segment_list[-1] = input_data[-max_input_length:]
+    last_hidden_states_list = []
+    for input_segment in segment_list:
+        input_masks = torch.tensor([1]*input_segment.size(0))
+        if filename.endswith(".txt"):
+            pad_indices = torch.ones(MAX_TEXT_LENGTH - input_segment.size(0)).long() * tokenizer.pad_token_id
+        else:
+            pad_indices = torch.ones((PATCH_LENGTH - input_segment.size(0), PATCH_SIZE)).long() * patchilizer.pad_token_id
+        input_masks = torch.cat((input_masks, torch.zeros(max_input_length - input_segment.size(0))), 0)
+        input_segment = torch.cat((input_segment, pad_indices), 0)
+        if filename.endswith(".txt"):
+            last_hidden_states = model.get_text_features(text_inputs=input_segment.unsqueeze(0).to(device),
+                                                         text_masks=input_masks.unsqueeze(0).to(device),
+                                                         get_normalized=get_normalized)
+        else:
+            last_hidden_states = model.get_music_features(music_inputs=input_segment.unsqueeze(0).to(device),
+                                                          music_masks=input_masks.unsqueeze(0).to(device),
+                                                          get_normalized=get_normalized)
+        if not get_normalized:
+            last_hidden_states = last_hidden_states[:, :input_masks.sum().long().item(), :]
+        last_hidden_states_list.append(last_hidden_states)
+    if not get_normalized:
+        last_hidden_states_list = [last_hidden_states[0] for last_hidden_states in last_hidden_states_list]
+        last_hidden_states_list[-1] = last_hidden_states_list[-1][-(len(input_data)%max_input_length):]
+        last_hidden_states_list = torch.concat(last_hidden_states_list, 0)
+    else:
+        full_chunk_cnt = len(input_data) // max_input_length
+        remain_chunk_len = len(input_data) % max_input_length
+        if remain_chunk_len == 0:
+            feature_weights = torch.tensor([max_input_length] * full_chunk_cnt, device=device).view(-1, 1)
+        else:
+            feature_weights = torch.tensor([max_input_length] * full_chunk_cnt + [remain_chunk_len], device=device).view(-1, 1)
+        last_hidden_states_list = torch.concat(last_hidden_states_list, 0)
+        last_hidden_states_list = last_hidden_states_list * feature_weights
+        last_hidden_states_list = last_hidden_states_list.sum(dim=0) / feature_weights.sum()
+    return last_hidden_states_list
+def process_directory(input_dir, output_dir, files):
+    print(f"Found {len(files)} files in total")
+    with open("logs/log_extract_clamp.txt", "a", encoding="utf-8") as f:
+        f.write("Found " + str(len(files)) + " files in total\n")
+    # calculate the number of files to process per GPU
+    num_files_per_gpu = len(files) // accelerator.num_processes
+    # calculate the start and end index for the current GPU
+    start_idx = accelerator.process_index * num_files_per_gpu
+    end_idx = start_idx + num_files_per_gpu
+    if accelerator.process_index == accelerator.num_processes - 1:
+        end_idx = len(files)
+    files_to_process = files[start_idx:end_idx]
+    # process the files
+    for file in tqdm(files_to_process):
+        output_subdir = output_dir + os.path.dirname(file)[len(input_dir):]
+        try:
+            os.makedirs(output_subdir, exist_ok=True)
+        except Exception as e:
+            print(output_subdir + " can not be created\n" + str(e))
+            with open("logs/log_extract_clamp.txt", "a") as f:
+                f.write(output_subdir + " can not be created\n" + str(e) + "\n")
+        output_file = os.path.join(output_subdir, os.path.splitext(os.path.basename(file))[0] + ".npy")
+        if os.path.exists(output_file):
+            print(f"Skipping {file}, output already exists")
+            with open("logs/skip_extract_clamp2.txt", "a", encoding="utf-8") as f:
+                f.write(file + "\n")
+            continue
+        try:
+            with torch.no_grad():
+                features = extract_feature(file).unsqueeze(0)
+            np.save(output_file, features.detach().cpu().numpy())
+            with open("logs/pass_extract_clamp2.txt", "a", encoding="utf-8") as f:
+                f.write(file + "\n")
+        except Exception as e:
+            print(f"Failed to process {file}: {e}")
+            with open("logs/log_extract_clamp.txt", "a", encoding="utf-8") as f:
+                f.write("Failed to process " + file + ": " + str(e) + "\n")
+with open("logs/files_shuffle_extract_clamp2.json", "r", encoding="utf-8") as f:
+    files = json.load(f)
+# process the files
+process_directory(input_dir, output_dir, files)
+with open("logs/log_extract_clamp.txt", "a", encoding="utf-8") as f:
+    f.write("GPU ID: " + str(device) + "\n")

code/extract_m3.py ADDED Viewed

	@@ -0,0 +1,162 @@

+import os
+import json
+import random
+import torch
+import numpy as np
+from tqdm import tqdm
+from config import *
+from utils import *
+from samplings import *
+from accelerate import Accelerator
+from transformers import BertConfig, GPT2Config
+import argparse
+# Parse command-line arguments for input_dir and output_dir
+parser = argparse.ArgumentParser(description="Process files to extract features.")
+parser.add_argument("input_dir", type=str, help="Directory with input files")
+parser.add_argument("output_dir", type=str, help="Directory to save extracted features")
+args = parser.parse_args()
+# Use args for input and output directories
+input_dir = args.input_dir
+output_dir = args.output_dir
+# Create logs directory if it doesn't exist
+os.makedirs("logs", exist_ok=True)
+# Remove existing log files if present
+for file in [
+    "logs/files_extract_m3.json",
+    "logs/files_shuffle_extract_m3.json",
+    "logs/log_extract_m3.txt",
+    "logs/pass_extract_m3.txt",
+    "logs/skip_extract_m3.txt",
+]:
+    if os.path.exists(file):
+        os.remove(file)
+# Collect input files
+files = []
+for root, dirs, fs in os.walk(input_dir):
+    for f in fs:
+        if f.endswith(".abc") or f.endswith(".mtf"):
+            files.append(os.path.join(root, f))
+print(f"Found {len(files)} files in total")
+with open("logs/files_extract_m3.json", "w", encoding="utf-8") as f:
+    json.dump(files, f)
+# Shuffle files and save the shuffled order
+random.shuffle(files)
+with open("logs/files_shuffle_extract_m3.json", "w", encoding="utf-8") as f:
+    json.dump(files, f)
+# Initialize accelerator and device
+accelerator = Accelerator()
+device = accelerator.device
+print("Using device:", device)
+with open("logs/log_extract_m3.txt", "a", encoding="utf-8") as f:
+    f.write("Using device: " + str(device) + "\n")
+# Model and configuration setup
+patchilizer = M3Patchilizer()
+encoder_config = BertConfig(
+    vocab_size=1,
+    hidden_size=M3_HIDDEN_SIZE,
+    num_hidden_layers=PATCH_NUM_LAYERS,
+    num_attention_heads=M3_HIDDEN_SIZE // 64,
+    intermediate_size=M3_HIDDEN_SIZE * 4,
+    max_position_embeddings=PATCH_LENGTH,
+)
+decoder_config = GPT2Config(
+    vocab_size=128,
+    n_positions=PATCH_SIZE,
+    n_embd=M3_HIDDEN_SIZE,
+    n_layer=TOKEN_NUM_LAYERS,
+    n_head=M3_HIDDEN_SIZE // 64,
+    n_inner=M3_HIDDEN_SIZE * 4,
+)
+model = M3Model(encoder_config, decoder_config).to(device)
+# Print parameter count
+print("Parameter Number: " + str(sum(p.numel() for p in model.parameters() if p.requires_grad)))
+# Load model weights
+model.eval()
+checkpoint = torch.load(M3_WEIGHTS_PATH, map_location='cpu', weights_only=True)
+print(f"Successfully Loaded Checkpoint from Epoch {checkpoint['epoch']} with loss {checkpoint['min_eval_loss']}")
+model.load_state_dict(checkpoint['model'])
+def extract_feature(item):
+    """Extracts features from input data."""
+    target_patches = patchilizer.encode(item, add_special_patches=True)
+    target_patches_list = [target_patches[i:i + PATCH_LENGTH] for i in range(0, len(target_patches), PATCH_LENGTH)]
+    target_patches_list[-1] = target_patches[-PATCH_LENGTH:]
+    last_hidden_states_list = []
+    for input_patches in target_patches_list:
+        input_masks = torch.tensor([1] * len(input_patches))
+        input_patches = torch.tensor(input_patches)
+        last_hidden_states = model.encoder(
+            input_patches.unsqueeze(0).to(device), input_masks.unsqueeze(0).to(device)
+        )["last_hidden_state"][0]
+        last_hidden_states_list.append(last_hidden_states)
+    # Handle the last segment padding correctly
+    last_hidden_states_list[-1] = last_hidden_states_list[-1][-(len(target_patches) % PATCH_LENGTH):]
+    return torch.concat(last_hidden_states_list, 0)
+def process_directory(input_dir, output_dir, files):
+    """Processes files in the input directory and saves features to the output directory."""
+    print(f"Found {len(files)} files in total")
+    with open("logs/log_extract_m3.txt", "a", encoding="utf-8") as f:
+        f.write("Found " + str(len(files)) + " files in total\n")
+    # Distribute files across processes for parallel processing
+    num_files_per_gpu = len(files) // accelerator.num_processes
+    start_idx = accelerator.process_index * num_files_per_gpu
+    end_idx = start_idx + num_files_per_gpu if accelerator.process_index < accelerator.num_processes - 1 else len(files)
+    files_to_process = files[start_idx:end_idx]
+    # Process each file
+    for file in tqdm(files_to_process):
+        output_subdir = output_dir + os.path.dirname(file)[len(input_dir):]
+        try:
+            os.makedirs(output_subdir, exist_ok=True)
+        except Exception as e:
+            print(f"{output_subdir} cannot be created\n{e}")
+            with open("logs/log_extract_m3.txt", "a") as f:
+                f.write(f"{output_subdir} cannot be created\n{e}\n")
+        output_file = os.path.join(output_subdir, os.path.splitext(os.path.basename(file))[0] + ".npy")
+        if os.path.exists(output_file):
+            print(f"Skipping {file}, output already exists")
+            with open("logs/skip_extract_m3.txt", "a", encoding="utf-8") as f:
+                f.write(file + "\n")
+            continue
+        try:
+            with open(file, "r", encoding="utf-8") as f:
+                item = f.read()
+                if not item.startswith("ticks_per_beat"):
+                    item = item.replace("L:1/8\n", "")
+                with torch.no_grad():
+                    features = extract_feature(item).unsqueeze(0)
+                np.save(output_file, features.detach().cpu().numpy())
+                with open("logs/pass_extract_m3.txt", "a", encoding="utf-8") as f:
+                    f.write(file + "\n")
+        except Exception as e:
+            print(f"Failed to process {file}: {e}")
+            with open("logs/log_extract_m3.txt", "a", encoding="utf-8") as f:
+                f.write(f"Failed to process {file}: {e}\n")
+# Load shuffled files list and start processing
+with open("logs/files_shuffle_extract_m3.json", "r", encoding="utf-8") as f:
+    files = json.load(f)
+# Process the directory
+process_directory(input_dir, output_dir, files)
+with open("logs/log_extract_m3.txt", "a", encoding="utf-8") as f:
+    f.write("GPU ID: " + str(device) + "\n")

code/logs_clamp2_h_size_768_lr_5e-05_batch_128_scale_1_t_length_128_t_model_FacebookAI_xlm-roberta-base_t_dropout_True_m3_True.txt ADDED Viewed

	@@ -0,0 +1,500 @@

+Epoch 1
+train_loss: 4.135209383230448
+eval_loss: 1.9609466393788655
+time: Sun Sep 15 16:53:25 2024
+Epoch 2
+train_loss: 2.9014642586344244
+eval_loss: 1.518966309229533
+time: Sun Sep 15 18:43:11 2024
+Epoch 3
+train_loss: 2.5554833216573805
+eval_loss: 1.2929850498835245
+time: Sun Sep 15 20:35:14 2024
+Epoch 4
+train_loss: 2.3372960954320985
+eval_loss: 1.1918509085973104
+time: Sun Sep 15 22:23:17 2024
+Epoch 5
+train_loss: 2.183457492896627
+eval_loss: 1.0725035190582275
+time: Mon Sep 16 00:11:33 2024
+Epoch 6
+train_loss: 2.0555087922796216
+eval_loss: 0.9798878033955892
+time: Mon Sep 16 01:59:41 2024
+Epoch 7
+train_loss: 1.9556777615308922
+eval_loss: 0.9242686867713928
+time: Mon Sep 16 03:47:45 2024
+Epoch 8
+train_loss: 1.8689270445336208
+eval_loss: 0.8476833502451578
+time: Mon Sep 16 05:35:55 2024
+Epoch 9
+train_loss: 1.7932923043361795
+eval_loss: 0.8043208519617716
+time: Mon Sep 16 07:24:40 2024
+Epoch 10
+train_loss: 1.726651672090251
+eval_loss: 0.7419429302215577
+time: Mon Sep 16 09:12:24 2024
+Epoch 11
+train_loss: 1.6657210920084013
+eval_loss: 0.7209376732508341
+time: Mon Sep 16 11:01:02 2024
+Epoch 12
+train_loss: 1.6131336856822078
+eval_loss: 0.6950737277666728
+time: Mon Sep 16 12:50:07 2024
+Epoch 13
+train_loss: 1.5601606700647368
+eval_loss: 0.652581254641215
+time: Mon Sep 16 14:40:19 2024
+Epoch 14
+train_loss: 1.5198849670231784
+eval_loss: 0.5868058403333029
+time: Mon Sep 16 16:30:09 2024
+Epoch 15
+train_loss: 1.4755114693644882
+eval_loss: 0.5867449243863424
+time: Mon Sep 16 18:20:11 2024
+Epoch 16
+train_loss: 1.4336211351749464
+eval_loss: 0.5479268968105316
+time: Mon Sep 16 20:10:42 2024
+Epoch 17
+train_loss: 1.4039166571722088
+eval_loss: 0.5280438164869944
+time: Mon Sep 16 22:01:01 2024
+Epoch 18
+train_loss: 1.365380759842085
+eval_loss: 0.5008598109086354
+time: Mon Sep 16 23:51:36 2024
+Epoch 19
+train_loss: 1.332988672848894
+eval_loss: 0.46479900081952413
+time: Tue Sep 17 01:41:49 2024
+Epoch 20
+train_loss: 1.3014001791981087
+eval_loss: 0.45230263272921245
+time: Tue Sep 17 03:33:12 2024
+Epoch 21
+train_loss: 1.2688752540577755
+eval_loss: 0.4297992348670959
+time: Tue Sep 17 05:23:40 2024
+Epoch 22
+train_loss: 1.2425967695381415
+eval_loss: 0.4219102164109548
+time: Tue Sep 17 07:14:26 2024
+Epoch 23
+train_loss: 1.216824040410488
+eval_loss: 0.40282649795214337
+time: Tue Sep 17 09:05:53 2024
+Epoch 24
+train_loss: 1.1875996505747286
+eval_loss: 0.36659018794695536
+time: Tue Sep 17 10:56:46 2024
+Epoch 25
+train_loss: 1.1670776548255222
+eval_loss: 0.36906688412030536
+time: Tue Sep 17 12:47:39 2024
+Epoch 26
+train_loss: 1.1426405137974536
+eval_loss: 0.3478178918361664
+time: Tue Sep 17 14:38:34 2024
+Epoch 27
+train_loss: 1.1208335824466733
+eval_loss: 0.33407697081565857
+time: Tue Sep 17 16:28:45 2024
+Epoch 28
+train_loss: 1.0998876758880667
+eval_loss: 0.33792892495791116
+time: Tue Sep 17 18:19:59 2024
+Epoch 29
+train_loss: 1.0769698478377083
+eval_loss: 0.3026650925477346
+time: Tue Sep 17 20:11:16 2024
+Epoch 30
+train_loss: 1.0587592209657248
+eval_loss: 0.2914476583401362
+time: Tue Sep 17 22:03:30 2024
+Epoch 31
+train_loss: 1.0384011404245468
+eval_loss: 0.27578969597816466
+time: Tue Sep 17 23:55:15 2024
+Epoch 32
+train_loss: 1.0233595809527622
+eval_loss: 0.2651842157046
+time: Wed Sep 18 01:46:45 2024
+Epoch 33
+train_loss: 1.001824217418977
+eval_loss: 0.2630385269721349
+time: Wed Sep 18 03:39:08 2024
+Epoch 34
+train_loss: 0.9853754720520442
+eval_loss: 0.25253995358943937
+time: Wed Sep 18 05:30:33 2024
+Epoch 35
+train_loss: 0.9676362536067821
+eval_loss: 0.24096360007921855
+time: Wed Sep 18 07:22:09 2024
+Epoch 36
+train_loss: 0.9507065269691086
+eval_loss: 0.2413844664891561
+time: Wed Sep 18 09:12:59 2024
+Epoch 37
+train_loss: 0.9362979678186832
+eval_loss: 0.23412639300028484
+time: Wed Sep 18 11:04:09 2024
+Epoch 38
+train_loss: 0.9174621180856977
+eval_loss: 0.21386308073997498
+time: Wed Sep 18 12:54:52 2024
+Epoch 39
+train_loss: 0.9090870427650668
+eval_loss: 0.19962686796983084
+time: Wed Sep 18 14:45:52 2024
+Epoch 40
+train_loss: 0.8918763521271409
+eval_loss: 0.20026112000147503
+time: Wed Sep 18 16:36:37 2024
+Epoch 41
+train_loss: 0.8786202421428222
+eval_loss: 0.18366556564966838
+time: Wed Sep 18 18:27:31 2024
+Epoch 42
+train_loss: 0.8670675420604148
+eval_loss: 0.17908457616964976
+time: Wed Sep 18 20:18:16 2024
+Epoch 43
+train_loss: 0.8505593872931582
+eval_loss: 0.17053016225496928
+time: Wed Sep 18 22:10:39 2024
+Epoch 44
+train_loss: 0.8421949260766888
+eval_loss: 0.17344878117243448
+time: Thu Sep 19 00:02:24 2024
+Epoch 45
+train_loss: 0.8267569324702205
+eval_loss: 0.1591893643140793
+time: Thu Sep 19 01:53:48 2024
+Epoch 46
+train_loss: 0.8144617894466949
+eval_loss: 0.15313500861326854
+time: Thu Sep 19 03:44:58 2024
+Epoch 47
+train_loss: 0.8041844731303666
+eval_loss: 0.14998503575722377
+time: Thu Sep 19 05:36:50 2024
+Epoch 48
+train_loss: 0.7938160687423412
+eval_loss: 0.1401842971642812
+time: Thu Sep 19 07:28:21 2024
+Epoch 49
+train_loss: 0.7808867423096515
+eval_loss: 0.1368137091398239
+time: Thu Sep 19 09:20:09 2024
+Epoch 50
+train_loss: 0.7702171771933628
+eval_loss: 0.13333487262328467
+time: Thu Sep 19 11:12:37 2024
+Epoch 51
+train_loss: 0.7604444062967384
+eval_loss: 0.13119754443566004
+time: Thu Sep 19 13:04:26 2024
+Epoch 52
+train_loss: 0.7496546459894258
+eval_loss: 0.1236343190073967
+time: Thu Sep 19 14:55:53 2024
+Epoch 53
+train_loss: 0.7406523988345118
+eval_loss: 0.12237562835216523
+time: Thu Sep 19 16:47:51 2024
+Epoch 54
+train_loss: 0.7331518270251398
+eval_loss: 0.11441469887892405
+time: Thu Sep 19 18:38:48 2024
+Epoch 55
+train_loss: 0.7238280263746373
+eval_loss: 0.10651812156041464
+time: Thu Sep 19 20:29:18 2024
+Epoch 56
+train_loss: 0.7141688125488486
+eval_loss: 0.10959143290917078
+time: Thu Sep 19 22:19:28 2024
+Epoch 57
+train_loss: 0.7053173944645842
+eval_loss: 0.10957898745934168
+time: Fri Sep 20 00:10:06 2024
+Epoch 58
+train_loss: 0.6992166797548109
+eval_loss: 0.09759224901596705
+time: Fri Sep 20 02:01:02 2024
+Epoch 59
+train_loss: 0.6855367768623795
+eval_loss: 0.10631066560745239
+time: Fri Sep 20 03:51:25 2024
+Epoch 60
+train_loss: 0.6812366953699432
+eval_loss: 0.08681503732999166
+time: Fri Sep 20 05:41:32 2024
+Epoch 61
+train_loss: 0.6744320154854127
+eval_loss: 0.08995070978999138
+time: Fri Sep 20 07:32:33 2024
+Epoch 62
+train_loss: 0.6627048003782218
+eval_loss: 0.08492780551314354
+time: Fri Sep 20 09:22:52 2024
+Epoch 63
+train_loss: 0.6554694614403961
+eval_loss: 0.09110054125388463
+time: Fri Sep 20 11:15:14 2024
+Epoch 64
+train_loss: 0.6519363358224428
+eval_loss: 0.08603844990332922
+time: Fri Sep 20 13:05:45 2024
+Epoch 65
+train_loss: 0.6432196787488694
+eval_loss: 0.07920929342508316
+time: Fri Sep 20 14:56:27 2024
+Epoch 66
+train_loss: 0.6355774498505016
+eval_loss: 0.08108622878789902
+time: Fri Sep 20 16:47:00 2024
+Epoch 67
+train_loss: 0.628098195042665
+eval_loss: 0.0835166151324908
+time: Fri Sep 20 18:37:19 2024
+Epoch 68
+train_loss: 0.6229319736150211
+eval_loss: 0.08126899500687917
+time: Fri Sep 20 20:27:49 2024
+Epoch 69
+train_loss: 0.6162204064685376
+eval_loss: 0.07405624414483707
+time: Fri Sep 20 22:18:28 2024
+Epoch 70
+train_loss: 0.6093617768645045
+eval_loss: 0.07916868552565574
+time: Sat Sep 21 00:10:02 2024
+Epoch 71
+train_loss: 0.603765148576412
+eval_loss: 0.07368899683157602
+time: Sat Sep 21 02:00:29 2024
+Epoch 72
+train_loss: 0.5988557130088281
+eval_loss: 0.06763924509286881
+time: Sat Sep 21 03:51:46 2024
+Epoch 73
+train_loss: 0.590835969827209
+eval_loss: 0.07139033873875936
+time: Sat Sep 21 05:43:51 2024
+Epoch 74
+train_loss: 0.5864904869113879
+eval_loss: 0.06859012718002001
+time: Sat Sep 21 07:34:23 2024
+Epoch 75
+train_loss: 0.5819329118342274
+eval_loss: 0.07611284777522087
+time: Sat Sep 21 09:25:24 2024
+Epoch 76
+train_loss: 0.5750655913014898
+eval_loss: 0.06813529431819916
+time: Sat Sep 21 11:16:26 2024
+Epoch 77
+train_loss: 0.5703848759963817
+eval_loss: 0.07192744488517443
+time: Sat Sep 21 13:07:32 2024
+Epoch 78
+train_loss: 0.5666614368024667
+eval_loss: 0.06931692684690158
+time: Sat Sep 21 14:59:16 2024
+Epoch 79
+train_loss: 0.5610024514409998
+eval_loss: 0.06487631574273109
+time: Sat Sep 21 16:50:56 2024
+Epoch 80
+train_loss: 0.5552226794301296
+eval_loss: 0.06034566586216291
+time: Sat Sep 21 18:43:49 2024
+Epoch 81
+train_loss: 0.5512203840912394
+eval_loss: 0.05962909683585167
+time: Sat Sep 21 20:36:01 2024
+Epoch 82
+train_loss: 0.5477618443893468
+eval_loss: 0.05546447386344274
+time: Sat Sep 21 22:28:13 2024
+Epoch 83
+train_loss: 0.5428704522615506
+eval_loss: 0.05013169844945272
+time: Sun Sep 22 00:21:20 2024
+Epoch 84
+train_loss: 0.5396500316264258
+eval_loss: 0.062498694161574046
+time: Sun Sep 22 02:13:07 2024
+Epoch 85
+train_loss: 0.5349479554715307
+eval_loss: 0.06073434228698413
+time: Sun Sep 22 04:05:17 2024
+Epoch 86
+train_loss: 0.5292192482811466
+eval_loss: 0.05734321524699529
+time: Sun Sep 22 05:57:05 2024
+Epoch 87
+train_loss: 0.5249555090607058
+eval_loss: 0.05274935985604922
+time: Sun Sep 22 07:48:52 2024
+Epoch 88
+train_loss: 0.523276918144503
+eval_loss: 0.05601314604282379
+time: Sun Sep 22 09:41:05 2024
+Epoch 89
+train_loss: 0.5179934711230115
+eval_loss: 0.057493301729361214
+time: Sun Sep 22 11:33:47 2024
+Epoch 90
+train_loss: 0.5129834874146376
+eval_loss: 0.05289425750573476
+time: Sun Sep 22 13:25:54 2024
+Epoch 91
+train_loss: 0.5104886514866054
+eval_loss: 0.0586332509915034
+time: Sun Sep 22 15:18:13 2024
+Epoch 92
+train_loss: 0.5067275374282622
+eval_loss: 0.0489634457975626
+time: Sun Sep 22 17:10:39 2024
+Epoch 93
+train_loss: 0.5038576471461468
+eval_loss: 0.05257208868861198
+time: Sun Sep 22 19:04:46 2024
+Epoch 94
+train_loss: 0.5013840998762528
+eval_loss: 0.05249967947602272
+time: Sun Sep 22 20:57:55 2024
+Epoch 95
+train_loss: 0.4949465335763684
+eval_loss: 0.048154672731955846
+time: Sun Sep 22 22:50:30 2024
+Epoch 96
+train_loss: 0.4925781255166608
+eval_loss: 0.052830965568621956
+time: Mon Sep 23 00:43:13 2024
+Epoch 97
+train_loss: 0.4875780233282
+eval_loss: 0.04684837857882182
+time: Mon Sep 23 02:35:38 2024
+Epoch 98
+train_loss: 0.4858591078021573
+eval_loss: 0.04507673804958661
+time: Mon Sep 23 04:28:25 2024
+Epoch 99
+train_loss: 0.4804891498405977
+eval_loss: 0.048148307204246524
+time: Mon Sep 23 06:21:11 2024
+Epoch 100
+train_loss: 0.4782898508661265
+eval_loss: 0.044317328557372096
+time: Mon Sep 23 08:13:38 2024

code/logs_m3_p_size_64_p_length_512_t_layers_3_p_layers_12_h_size_768_lr_0.0001_batch_16_mask_0.45.txt ADDED Viewed

	@@ -0,0 +1,500 @@

+Epoch 1
+train_loss: 0.3055062843047765
+eval_loss: 0.03727418192925584
+time: Wed Aug  7 08:54:27 2024
+Epoch 2
+train_loss: 0.1718286834194018
+eval_loss: 0.020085958587123625
+time: Wed Aug  7 14:38:40 2024
+Epoch 3
+train_loss: 0.1476379437283353
+eval_loss: 0.013794219702922342
+time: Wed Aug  7 20:24:50 2024
+Epoch 4
+train_loss: 0.13554848474242498
+eval_loss: 0.011902455668817844
+time: Thu Aug  8 02:13:14 2024
+Epoch 5
+train_loss: 0.12781724531702496
+eval_loss: 0.008929020740909163
+time: Thu Aug  8 08:00:32 2024
+Epoch 6
+train_loss: 0.12264176163121285
+eval_loss: 0.008453744098166877
+time: Thu Aug  8 13:47:12 2024
+Epoch 7
+train_loss: 0.11872020949762974
+eval_loss: 0.007165850172573819
+time: Thu Aug  8 19:34:09 2024
+Epoch 8
+train_loss: 0.1153576639058103
+eval_loss: 0.006601243383027142
+time: Fri Aug  9 01:22:19 2024
+Epoch 9
+train_loss: 0.11312788720856465
+eval_loss: 0.005973297544609645
+time: Fri Aug  9 07:11:25 2024
+Epoch 10
+train_loss: 0.11096722687304313
+eval_loss: 0.005796642405204946
+time: Fri Aug  9 12:59:13 2024
+Epoch 11
+train_loss: 0.10913465011501206
+eval_loss: 0.005249736892483845
+time: Fri Aug  9 18:46:14 2024
+Epoch 12
+train_loss: 0.10780615577682347
+eval_loss: 0.005115668955435858
+time: Sat Aug 10 00:34:28 2024
+Epoch 13
+train_loss: 0.10650418949283817
+eval_loss: 0.00475350690255028
+time: Sat Aug 10 06:22:51 2024
+Epoch 14
+train_loss: 0.10524643352798381
+eval_loss: 0.004583307054632575
+time: Sat Aug 10 12:11:18 2024
+Epoch 15
+train_loss: 0.1041887047117438
+eval_loss: 0.004289783886142609
+time: Sat Aug 10 17:59:48 2024
+Epoch 16
+train_loss: 0.10343191375801945
+eval_loss: 0.004421581262192111
+time: Sat Aug 10 23:47:37 2024
+Epoch 17
+train_loss: 0.10256196519161385
+eval_loss: 0.004104017818401634
+time: Sun Aug 11 05:35:49 2024
+Epoch 18
+train_loss: 0.10170993055767087
+eval_loss: 0.0039769458375234585
+time: Sun Aug 11 11:23:39 2024
+Epoch 19
+train_loss: 0.1011880517951369
+eval_loss: 0.0039005329324529833
+time: Sun Aug 11 17:11:19 2024
+Epoch 20
+train_loss: 0.10030771156829077
+eval_loss: 0.0036845325137673237
+time: Sun Aug 11 22:59:49 2024
+Epoch 21
+train_loss: 0.09972109616302548
+eval_loss: 0.0038503893043940205
+time: Mon Aug 12 04:48:03 2024
+Epoch 22
+train_loss: 0.09932596696744844
+eval_loss: 0.00370702411211194
+time: Mon Aug 12 10:36:32 2024
+Epoch 23
+train_loss: 0.09888291950362459
+eval_loss: 0.0034573812171313834
+time: Mon Aug 12 16:24:55 2024
+Epoch 24
+train_loss: 0.09852503939581284
+eval_loss: 0.003370235667697582
+time: Mon Aug 12 22:12:14 2024
+Epoch 25
+train_loss: 0.09825884147004627
+eval_loss: 0.00346387299475209
+time: Tue Aug 13 04:00:42 2024
+Epoch 26
+train_loss: 0.09756856258879791
+eval_loss: 0.0033276399650575615
+time: Tue Aug 13 09:49:22 2024
+Epoch 27
+train_loss: 0.09730380131801182
+eval_loss: 0.003326884365762399
+time: Tue Aug 13 15:36:05 2024
+Epoch 28
+train_loss: 0.09687296288584166
+eval_loss: 0.0034621171255395573
+time: Tue Aug 13 21:23:24 2024
+Epoch 29
+train_loss: 0.09668537175198876
+eval_loss: 0.003284947640647648
+time: Wed Aug 14 03:10:21 2024
+Epoch 30
+train_loss: 0.09628572566572022
+eval_loss: 0.003119471057549999
+time: Wed Aug 14 08:56:45 2024
+Epoch 31
+train_loss: 0.09617123452549026
+eval_loss: 0.003124797866062776
+time: Wed Aug 14 14:43:12 2024
+Epoch 32
+train_loss: 0.09578377932399237
+eval_loss: 0.0030736677601092537
+time: Wed Aug 14 20:31:01 2024
+Epoch 33
+train_loss: 0.09558304869954821
+eval_loss: 0.003178201471396451
+time: Thu Aug 15 02:19:14 2024
+Epoch 34
+train_loss: 0.0952804450174092
+eval_loss: 0.0030847328114775225
+time: Thu Aug 15 08:06:29 2024
+Epoch 35
+train_loss: 0.09513826066486042
+eval_loss: 0.00303873973446682
+time: Thu Aug 15 13:52:17 2024
+Epoch 36
+train_loss: 0.09466769916316405
+eval_loss: 0.0030122215467611258
+time: Thu Aug 15 19:38:29 2024
+Epoch 37
+train_loss: 0.09465687754501316
+eval_loss: 0.00289094522015785
+time: Fri Aug 16 01:25:14 2024
+Epoch 38
+train_loss: 0.09435585222324992
+eval_loss: 0.0030173959307773393
+time: Fri Aug 16 07:11:56 2024
+Epoch 39
+train_loss: 0.09413478592045794
+eval_loss: 0.002968058454507435
+time: Fri Aug 16 12:59:16 2024
+Epoch 40
+train_loss: 0.09393180562734375
+eval_loss: 0.0030673167865746948
+time: Fri Aug 16 18:45:23 2024
+Epoch 41
+train_loss: 0.09365266143982799
+eval_loss: 0.00287582161187937
+time: Sat Aug 17 00:31:47 2024
+Epoch 42
+train_loss: 0.09359205519747489
+eval_loss: 0.0027280030162997134
+time: Sat Aug 17 06:18:32 2024
+Epoch 43
+train_loss: 0.09349238520961266
+eval_loss: 0.0029261269570300787
+time: Sat Aug 17 12:05:26 2024
+Epoch 44
+train_loss: 0.09324607778116949
+eval_loss: 0.002691730654519444
+time: Sat Aug 17 17:52:20 2024
+Epoch 45
+train_loss: 0.09310021795996155
+eval_loss: 0.0028863806760858132
+time: Sat Aug 17 23:38:57 2024
+Epoch 46
+train_loss: 0.09307358593283441
+eval_loss: 0.002793597210717352
+time: Sun Aug 18 05:25:42 2024
+Epoch 47
+train_loss: 0.09299390690766882
+eval_loss: 0.0027052024821456098
+time: Sun Aug 18 11:12:32 2024
+Epoch 48
+train_loss: 0.09253486422624911
+eval_loss: 0.0027312307396534247
+time: Sun Aug 18 16:59:16 2024
+Epoch 49
+train_loss: 0.09243107154309635
+eval_loss: 0.002648197562936772
+time: Sun Aug 18 22:46:41 2024
+Epoch 50
+train_loss: 0.09237845186490301
+eval_loss: 0.0026844193827840284
+time: Mon Aug 19 04:35:10 2024
+Epoch 51
+train_loss: 0.09231985249015236
+eval_loss: 0.002708845011956738
+time: Mon Aug 19 10:24:17 2024
+Epoch 52
+train_loss: 0.0922615721153286
+eval_loss: 0.0035362059711223225
+time: Mon Aug 19 16:11:39 2024
+Epoch 53
+train_loss: 0.09200190843071623
+eval_loss: 0.0025848455890180064
+time: Mon Aug 19 21:58:31 2024
+Epoch 54
+train_loss: 0.09200848002425245
+eval_loss: 0.0026311414897881983
+time: Tue Aug 20 03:45:36 2024
+Epoch 55
+train_loss: 0.09154813869071807
+eval_loss: 0.0025586662145983823
+time: Tue Aug 20 09:34:48 2024
+Epoch 56
+train_loss: 0.09162745474034129
+eval_loss: 0.0026280648907143545
+time: Tue Aug 20 15:23:23 2024
+Epoch 57
+train_loss: 0.09156280245772795
+eval_loss: 0.002539119078534093
+time: Tue Aug 20 21:11:25 2024
+Epoch 58
+train_loss: 0.09142590950099329
+eval_loss: 0.0026369429265152866
+time: Wed Aug 21 02:59:19 2024
+Epoch 59
+train_loss: 0.09139848643851392
+eval_loss: 0.0024354966580356916
+time: Wed Aug 21 08:46:23 2024
+Epoch 60
+train_loss: 0.09131192888740647
+eval_loss: 0.0024594995301248277
+time: Wed Aug 21 14:33:28 2024
+Epoch 61
+train_loss: 0.09122042933562911
+eval_loss: 0.002616936316367883
+time: Wed Aug 21 20:20:57 2024
+Epoch 62
+train_loss: 0.09109125168796305
+eval_loss: 0.0025555431279884297
+time: Thu Aug 22 02:08:45 2024
+Epoch 63
+train_loss: 0.09106527324403817
+eval_loss: 0.0025145284593781213
+time: Thu Aug 22 07:56:26 2024
+Epoch 64
+train_loss: 0.09095406525682191
+eval_loss: 0.0025151555842959678
+time: Thu Aug 22 13:45:57 2024
+Epoch 65
+train_loss: 0.09102793501718281
+eval_loss: 0.0024135450126194563
+time: Thu Aug 22 19:54:28 2024
+Epoch 66
+train_loss: 0.0908411063853937
+eval_loss: 0.002460922076728368
+time: Fri Aug 23 01:59:41 2024
+Epoch 67
+train_loss: 0.09070221083785855
+eval_loss: 0.002453409551882543
+time: Fri Aug 23 07:52:30 2024
+Epoch 68
+train_loss: 0.0906545008953897
+eval_loss: 0.0024080786435031784
+time: Fri Aug 23 13:41:28 2024
+Epoch 69
+train_loss: 0.0907353380525871
+eval_loss: 0.0024573436347799147
+time: Fri Aug 23 19:27:14 2024
+Epoch 70
+train_loss: 0.09040538104085095
+eval_loss: 0.0023765437401249566
+time: Sat Aug 24 01:14:45 2024
+Epoch 71
+train_loss: 0.09036114065518137
+eval_loss: 0.0023877528348234226
+time: Sat Aug 24 07:02:04 2024
+Epoch 72
+train_loss: 0.09037455027205546
+eval_loss: 0.002315233082103814
+time: Sat Aug 24 12:49:24 2024
+Epoch 73
+train_loss: 0.09026183628343257
+eval_loss: 0.0024284060419643228
+time: Sat Aug 24 18:35:36 2024
+Epoch 74
+train_loss: 0.09019025581511034
+eval_loss: 0.002393116130206718
+time: Sun Aug 25 00:21:29 2024
+Epoch 75
+train_loss: 0.089901714783446
+eval_loss: 0.002298152916632467
+time: Sun Aug 25 06:08:01 2024
+Epoch 76
+train_loss: 0.09018262871273484
+eval_loss: 0.002273971366672482
+time: Sun Aug 25 11:54:02 2024
+Epoch 77
+train_loss: 0.08998425874228
+eval_loss: 0.002317420323379338
+time: Sun Aug 25 17:44:05 2024
+Epoch 78
+train_loss: 0.08983653943919646
+eval_loss: 0.0024391192159878743
+time: Sun Aug 25 23:31:34 2024
+Epoch 79
+train_loss: 0.08981405456901183
+eval_loss: 0.002319374949895317
+time: Mon Aug 26 05:24:56 2024
+Epoch 80
+train_loss: 0.08974534569690559
+eval_loss: 0.0023008979344151066
+time: Mon Aug 26 11:28:33 2024
+Epoch 81
+train_loss: 0.08972110153310983
+eval_loss: 0.002406696710865237
+time: Mon Aug 26 17:33:30 2024
+Epoch 82
+train_loss: 0.0895689915361898
+eval_loss: 0.002241936448434926
+time: Mon Aug 26 23:39:15 2024
+Epoch 83
+train_loss: 0.08950625452328584
+eval_loss: 0.002408353965493697
+time: Tue Aug 27 05:37:59 2024
+Epoch 84
+train_loss: 0.08959725393084628
+eval_loss: 0.0023435966142665455
+time: Tue Aug 27 11:34:29 2024
+Epoch 85
+train_loss: 0.08970333726515986
+eval_loss: 0.0023965956810233086
+time: Tue Aug 27 17:27:31 2024
+Epoch 86
+train_loss: 0.08948115523227308
+eval_loss: 0.002325803569256709
+time: Tue Aug 27 23:19:43 2024
+Epoch 87
+train_loss: 0.08933937688654775
+eval_loss: 0.0023552257988114647
+time: Wed Aug 28 05:11:34 2024
+Epoch 88
+train_loss: 0.08938353908107184
+eval_loss: 0.0024397599904794043
+time: Wed Aug 28 11:01:23 2024
+Epoch 89
+train_loss: 0.08921640703096091
+eval_loss: 0.002223708766084243
+time: Wed Aug 28 16:50:21 2024
+Epoch 90
+train_loss: 0.08929300930090782
+eval_loss: 0.0022849828316260303
+time: Wed Aug 28 22:38:53 2024
+Epoch 91
+train_loss: 0.08910525214309825
+eval_loss: 0.0022257193633186227
+time: Thu Aug 29 04:35:16 2024
+Epoch 92
+train_loss: 0.08905495976636461
+eval_loss: 0.0022299331251850137
+time: Thu Aug 29 10:29:46 2024
+Epoch 93
+train_loss: 0.08890526102100955
+eval_loss: 0.0022962711695463786
+time: Thu Aug 29 16:23:49 2024
+Epoch 94
+train_loss: 0.08908289874104246
+eval_loss: 0.002243622880820028
+time: Thu Aug 29 22:15:42 2024
+Epoch 95
+train_loss: 0.08908785978677156
+eval_loss: 0.0022457318524397784
+time: Fri Aug 30 04:06:57 2024
+Epoch 96
+train_loss: 0.08888098475318565
+eval_loss: 0.002224675611787346
+time: Fri Aug 30 09:58:43 2024
+Epoch 97
+train_loss: 0.08888529259134526
+eval_loss: 0.0021844924980664493
+time: Fri Aug 30 15:50:16 2024
+Epoch 98
+train_loss: 0.08885388837534758
+eval_loss: 0.0022109088076294288
+time: Fri Aug 30 21:41:59 2024
+Epoch 99
+train_loss: 0.08873902663868657
+eval_loss: 0.0022606451996653202
+time: Sat Aug 31 03:34:46 2024
+Epoch 100
+train_loss: 0.08877080098666765
+eval_loss: 0.002279470525367602
+time: Sat Aug 31 09:28:38 2024

code/train_clamp2.py ADDED Viewed

	@@ -0,0 +1,356 @@

+import os
+import json
+import time
+import wandb
+import torch
+import random
+import numpy as np
+from utils import *
+from config import *
+from tqdm import tqdm
+from copy import deepcopy
+import torch.distributed as dist
+from torch.amp import autocast, GradScaler
+from torch.utils.data import Dataset, DataLoader
+from torch.utils.data.distributed import DistributedSampler
+from torch.nn.parallel import DistributedDataParallel as DDP
+from transformers import AutoTokenizer, BertConfig, get_constant_schedule_with_warmup
+def list_files_in_json(json_path):
+    file_list = []
+    if os.path.exists(json_path):
+        with open(json_path, 'r', encoding='utf-8') as f:
+            for line in f:
+                item = json.loads(line)
+                file_list.append(item)
+    return file_list
+def collate_batch(batch):
+    text_inputs, text_masks, music_inputs, music_masks = zip(*batch)
+    text_inputs = torch.stack(text_inputs)
+    text_masks = torch.stack(text_masks)
+    music_inputs = torch.stack(music_inputs)
+    music_masks = torch.stack(music_masks)
+    return text_inputs, text_masks, music_inputs, music_masks
+class TextMusicDataset(Dataset):
+    def __init__(self, items, mode):
+        print("The number of "+mode+" data: "+str(len(items)))
+        self.items = items
+        self.mode = mode
+        if self.mode == 'train' or not EVAL_JSONL:
+            self.datapath = os.path.dirname(TRAIN_JSONL)
+        elif self.mode == 'eval':
+            self.datapath = os.path.dirname(EVAL_JSONL)
+    def text_dropout(self, item):
+        if random.random() < 0.5:
+            candidates = []
+            for key in item.keys():
+                if key not in ["summary_en", "summary_nen", "filepaths"]:
+                    if item[key] == None:
+                        continue
+                    elif isinstance(item[key], str):
+                        candidates.append(item[key])
+                    elif isinstance(item[key], list):
+                        candidates.extend(item[key])
+            candidates = list(set(candidates))
+            candidates = "\n".join(candidates)
+            candidates = candidates.split("\n")
+            selected_candidates = [c for c in candidates if len(c) > 0 and random.random() < 0.5]
+            if len(selected_candidates) == 0:
+                selected_candidates = candidates
+            random.shuffle(selected_candidates)
+            text = tokenizer.sep_token.join(selected_candidates)
+        else:
+            if random.random() < 0.5:
+                text = random.choice(item["summary_en"])
+            else:
+                text = random.choice(item["summary_nen"])["summary"]
+        return text
+    def random_truncate(self, input_tensor, max_length):
+        choices = ["head", "tail", "middle"]
+        choice = random.choice(choices)
+        if choice == "head" or self.mode == 'eval':
+            input_tensor = input_tensor[:max_length]
+        elif choice == "tail":
+            input_tensor = input_tensor[-max_length:]
+        elif choice == "middle":
+            start = random.randint(1, input_tensor.size(0)-max_length)
+            input_tensor = input_tensor[start:start+max_length]
+        return input_tensor
+    def __len__(self):
+        return len(self.items)
+    def __getitem__(self, idx):
+        item = self.items[idx]
+        # randomly select text from the item
+        if self.mode == 'train' and TEXT_DROPOUT:
+            text = self.text_dropout(item)
+        else:
+            text = item["summary_en"][0]
+        # tokenize text and build mask for text tokens
+        text_inputs = tokenizer(text, return_tensors='pt')
+        text_inputs = text_inputs['input_ids'].squeeze(0)
+        if text_inputs.size(0) > MAX_TEXT_LENGTH:
+            text_inputs = self.random_truncate(text_inputs, MAX_TEXT_LENGTH)
+        text_masks = torch.ones(text_inputs.size(0))
+        # load music file
+        if self.mode == 'train':
+            filepath = random.choice(item["filepaths"])
+        else:
+            if item["filepaths"][0].endswith(".abc"):
+                filepath = item["filepaths"][0]
+            else:
+                filepath = item["filepaths"][1]
+        filepath = self.datapath + '/' + filepath
+        with open(filepath, "r", encoding="utf-8") as f:
+            item = f.read().replace("L:1/8\n", "") if filepath.endswith(".abc") else f.read()
+        # randomly remove instrument info from the music file
+        if random.random() < 0.9 and self.mode == 'train':
+            item = remove_instrument_info(item)
+        # mask music inputs
+        music_inputs = patchilizer.encode(item, add_special_patches=True, truncate=True, random_truncate=(self.mode=="train"))
+        music_inputs = torch.tensor(music_inputs)
+        music_masks = torch.ones(music_inputs.size(0))
+        # pad text inputs and masks
+        pad_indices = torch.ones(MAX_TEXT_LENGTH - text_inputs.size(0)).long() * tokenizer.pad_token_id
+        text_inputs = torch.cat((text_inputs, pad_indices), 0)
+        text_masks = torch.cat((text_masks, torch.zeros(MAX_TEXT_LENGTH - text_masks.size(0))), 0)
+        # pad music inputs and masks
+        pad_indices = torch.ones((PATCH_LENGTH - music_inputs.size(0), PATCH_SIZE)).long() * patchilizer.pad_token_id
+        music_inputs = torch.cat((music_inputs, pad_indices), 0)
+        music_masks = torch.cat((music_masks, torch.zeros(PATCH_LENGTH - music_masks.size(0))), 0)
+        return text_inputs, text_masks, music_inputs, music_masks
+# call model with a batch of input
+def process_one_batch(batch):
+    text_inputs, text_masks, music_inputs, music_masks = batch
+    loss = model(text_inputs,
+                text_masks,
+                music_inputs,
+                music_masks)
+    # Reduce the loss on GPU 0
+    if world_size > 1:
+        loss = loss.unsqueeze(0)
+        dist.reduce(loss, dst=0)
+        loss = loss / world_size
+        dist.broadcast(loss, src=0)
+    return loss.mean()
+# do one epoch for training
+def train_epoch(epoch):
+    tqdm_train_set = tqdm(train_set)
+    total_train_loss = 0
+    iter_idx = 1
+    model.train()
+    train_steps = (epoch-1)*len(train_set)
+    for batch in tqdm_train_set:
+        with autocast(device_type='cuda'):
+            loss = process_one_batch(batch)
+        scaler.scale(loss).backward()
+        total_train_loss += loss.item()
+        scaler.step(optimizer)
+        scaler.update()
+        lr_scheduler.step()
+        model.zero_grad(set_to_none=True)
+        tqdm_train_set.set_postfix({str(global_rank)+'_train_loss': total_train_loss / iter_idx})
+        train_steps += 1
+        # Log the training loss to wandb
+        if global_rank==0 and CLAMP2_WANDB_LOG:
+            wandb.log({"train_loss": total_train_loss / iter_idx}, step=train_steps)
+        iter_idx += 1
+    return total_train_loss / (iter_idx-1)
+# do one epoch for eval
+def eval_epoch():
+    tqdm_eval_set = tqdm(eval_set)
+    total_eval_loss = 0
+    iter_idx = 1
+    model.eval()
+    # Evaluate data for one epoch
+    for batch in tqdm_eval_set:
+        with torch.no_grad():
+            loss = process_one_batch(batch)
+        total_eval_loss += loss.item()
+        tqdm_eval_set.set_postfix({str(global_rank)+'_eval_loss': total_eval_loss / iter_idx})
+        iter_idx += 1
+    return total_eval_loss / (iter_idx-1)
+# train and eval
+if __name__ == "__main__":
+    # Set up distributed training
+    world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1
+    global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else 0
+    local_rank = int(os.environ['LOCAL_RANK']) if 'LOCAL_RANK' in os.environ else 0
+    if world_size > 1:
+        torch.cuda.set_device(local_rank)
+        device = torch.device("cuda", local_rank)
+        dist.init_process_group(backend='nccl')
+    else:
+        device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    if CLAMP2_DETERMINISTIC:
+        seed = 42 + global_rank
+        random.seed(seed)
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+    m3_config = BertConfig(vocab_size=1,
+                            hidden_size=M3_HIDDEN_SIZE,
+                            num_hidden_layers=PATCH_NUM_LAYERS,
+                            num_attention_heads=M3_HIDDEN_SIZE//64,
+                            intermediate_size=M3_HIDDEN_SIZE*4,
+                            max_position_embeddings=PATCH_LENGTH)
+    model = CLaMP2Model(m3_config,
+                        global_rank,
+                        world_size,
+                        TEXT_MODEL_NAME,
+                        CLAMP2_HIDDEN_SIZE,
+                        CLAMP2_LOAD_M3)
+    model = model.to(device)
+    tokenizer = AutoTokenizer.from_pretrained(TEXT_MODEL_NAME)
+    patchilizer = M3Patchilizer()
+    # print parameter number
+    print("Parameter Number: "+str(sum(p.numel() for p in model.parameters() if p.requires_grad)))
+    if world_size > 1:
+        model = DDP(model, device_ids=[local_rank], output_device=local_rank,  find_unused_parameters=True)
+    scaler = GradScaler()
+    optimizer = torch.optim.AdamW(model.parameters(), lr=CLAMP2_LEARNING_RATE)
+    if CLAMP2_WANDB_LOG and global_rank==0:
+        # Initialize wandb
+        if WANDB_KEY:
+            wandb.login(key=WANDB_KEY)
+        wandb.init(project="clamp2",
+                   name=CLAMP2_WEIGHTS_PATH.replace("weights_", "").replace(".pth", ""))
+    # load filenames under train and eval folder
+    train_files = list_files_in_json(TRAIN_JSONL)
+    eval_files = list_files_in_json(EVAL_JSONL)
+    if len(eval_files)==0:
+        train_files, eval_files = split_data(train_files)
+    train_batch_nums = int(len(train_files) / CLAMP2_BATCH_SIZE)
+    eval_batch_nums = int(len(eval_files) / CLAMP2_BATCH_SIZE)
+    train_files = train_files[:train_batch_nums*CLAMP2_BATCH_SIZE]
+    eval_files = eval_files[:eval_batch_nums*CLAMP2_BATCH_SIZE]
+    train_set = TextMusicDataset(train_files, 'train')
+    eval_set = TextMusicDataset(eval_files, 'eval')
+    train_sampler = DistributedSampler(train_set, num_replicas=world_size, rank=global_rank)
+    eval_sampler = DistributedSampler(eval_set, num_replicas=world_size, rank=global_rank)
+    train_set = DataLoader(train_set, batch_size=CLAMP2_BATCH_SIZE, collate_fn=collate_batch, sampler=train_sampler, shuffle = (train_sampler is None))
+    eval_set = DataLoader(eval_set, batch_size=CLAMP2_BATCH_SIZE, collate_fn=collate_batch, sampler=eval_sampler, shuffle = (train_sampler is None))
+    lr_scheduler = get_constant_schedule_with_warmup(optimizer = optimizer, num_warmup_steps = 1000)
+    if CLAMP2_LOAD_CKPT and os.path.exists(CLAMP2_WEIGHTS_PATH):
+        # Load checkpoint to CPU
+        checkpoint = torch.load(CLAMP2_WEIGHTS_PATH, map_location='cpu', weights_only=True)
+        # Here, model is assumed to be on GPU
+        # Load state dict to CPU model first, then move the model to GPU
+        if torch.cuda.device_count() > 1:
+            # If you have a DataParallel model, you need to load to model.module instead
+            cpu_model = deepcopy(model.module)
+            cpu_model.load_state_dict(checkpoint['model'])
+            model.module.load_state_dict(cpu_model.state_dict())
+        else:
+            # Load to a CPU clone of the model, then load back
+            cpu_model = deepcopy(model)
+            cpu_model.load_state_dict(checkpoint['model'])
+            model.load_state_dict(cpu_model.state_dict())
+        optimizer.load_state_dict(checkpoint['optimizer'])
+        lr_scheduler.load_state_dict(checkpoint['lr_sched'])
+        pre_epoch = checkpoint['epoch']
+        best_epoch = checkpoint['best_epoch']
+        min_eval_loss = checkpoint['min_eval_loss']
+        print(f"Successfully Loaded Checkpoint from Epoch {checkpoint['epoch']} with loss {checkpoint['min_eval_loss']}")
+        checkpoint = None
+    else:
+        pre_epoch = 0
+        best_epoch = 0
+        min_eval_loss = float('inf')
+    model = model.to(device)
+    optimizer = torch.optim.AdamW(model.parameters(), lr=CLAMP2_LEARNING_RATE)
+    for epoch in range(1+pre_epoch, CLAMP2_NUM_EPOCH+1):
+        train_sampler.set_epoch(epoch)
+        eval_sampler.set_epoch(epoch)
+        print('-' * 21 + "Epoch " + str(epoch) + '-' * 21)
+        train_loss = train_epoch(epoch)
+        eval_loss = eval_epoch()
+        if global_rank==0:
+            with open(CLAMP2_LOGS_PATH,'a') as f:
+                f.write("Epoch " + str(epoch) + "\ntrain_loss: " + str(train_loss) + "\neval_loss: " +str(eval_loss) + "\ntime: " + time.asctime(time.localtime(time.time())) + "\n\n")
+            if eval_loss < min_eval_loss:
+                best_epoch = epoch
+                min_eval_loss = eval_loss
+                checkpoint = {
+                                'model': model.module.state_dict() if hasattr(model, "module") else model.state_dict(),
+                                'optimizer': optimizer.state_dict(),
+                                'lr_sched': lr_scheduler.state_dict(),
+                                'epoch': epoch,
+                                'best_epoch': best_epoch,
+                                'min_eval_loss': min_eval_loss
+                                }
+                torch.save(checkpoint, CLAMP2_WEIGHTS_PATH)
+            checkpoint = {
+                            'model': model.module.state_dict() if hasattr(model, "module") else model.state_dict(),
+                            'optimizer': optimizer.state_dict(),
+                            'lr_sched': lr_scheduler.state_dict(),
+                            'epoch': epoch,
+                            'best_epoch': best_epoch,
+                            'min_eval_loss': min_eval_loss
+                            }
+            torch.save(checkpoint, "latest_"+CLAMP2_WEIGHTS_PATH)
+        if world_size > 1:
+            dist.barrier()
+    if global_rank==0:
+        print("Best Eval Epoch : "+str(best_epoch))
+        print("Min Eval Loss : "+str(min_eval_loss))

code/train_m3.py ADDED Viewed

	@@ -0,0 +1,321 @@

+import os
+import gc
+import time
+import wandb
+import torch
+import random
+import weakref
+import numpy as np
+from utils import *
+from config import *
+from tqdm import tqdm
+from copy import deepcopy
+import torch.distributed as dist
+from torch.amp import autocast, GradScaler
+from torch.utils.data import Dataset, DataLoader
+from torch.utils.data.distributed import DistributedSampler
+from torch.nn.parallel import DistributedDataParallel as DDP
+from transformers import BertConfig, GPT2Config, get_constant_schedule_with_warmup
+patchilizer = M3Patchilizer()
+def clear_unused_tensors():
+    gc.disable()  # Temporarily disable garbage collection
+    try:
+        # Get the set of tensor ids used by the model
+        if hasattr(model, "module"):
+            model_tensors = {id(p) for p in model.module.parameters()}
+        else:
+            model_tensors = {id(p) for p in model.parameters()}
+        # Get the set of tensor ids used by the optimizer
+        optimizer_tensors = {
+            id(state)
+            for state_dict in optimizer.state.values()
+            for state in state_dict.values()
+            if isinstance(state, torch.Tensor)  # Ensure only tensors are considered
+        }
+        # List of all CUDA tensors currently in memory
+        tensors = [obj for obj in gc.get_objects() if isinstance(obj, torch.Tensor) and obj.is_cuda]
+        # Create weak references to avoid interfering with garbage collection
+        tensor_refs = [weakref.ref(tensor) for tensor in tensors]
+        for tensor_ref in tensor_refs:
+            tensor = tensor_ref()  # Dereference the weak reference
+            if tensor is not None and id(tensor) not in model_tensors and id(tensor) not in optimizer_tensors:
+                # Mark the tensor for deletion
+                tensor.detach_()  # Detach from computation graph
+                del tensor  # Delete the tensor reference
+    except:
+        pass
+    finally:
+        gc.enable()  # Re-enable garbage collection
+        gc.collect()  # Force a garbage collection
+        torch.cuda.empty_cache()  # Clear the CUDA cache
+def list_files_in_directory(directories, extensions=["abc", "mtf"]):
+    file_list = []
+    for directory in directories:
+        for root, dirs, files in os.walk(directory):
+            for file in files:
+                if any(file.endswith(ext) for ext in extensions):
+                    file_path = os.path.join(root, file)
+                    file_list.append(file_path)
+    return file_list
+def collate_batch(batch):
+    input_patches, input_masks, selected_indices, target_patches = zip(*batch)
+    input_patches = torch.nn.utils.rnn.pad_sequence(input_patches, batch_first=True, padding_value=patchilizer.pad_token_id)
+    input_masks = torch.nn.utils.rnn.pad_sequence(input_masks, batch_first=True, padding_value=0)
+    selected_indices = torch.nn.utils.rnn.pad_sequence(selected_indices, batch_first=True, padding_value=0)
+    target_patches = torch.nn.utils.rnn.pad_sequence(target_patches, batch_first=True, padding_value=patchilizer.pad_token_id)
+    return input_patches, input_masks, selected_indices, target_patches
+class M3Dataset(Dataset):
+    def __init__(self, filenames, mode):
+        print("The number of "+mode+" data: "+str(len(filenames)))
+        self.filenames = filenames
+        self.mode = mode
+    def __len__(self):
+        return len(self.filenames)
+    def __getitem__(self, idx):
+        filename = self.filenames[idx]
+        try:
+            with open(filename, "r", encoding="utf-8") as f:
+                item = f.read().replace("L:1/8\n", "") if filename.endswith(".abc") else f.read()
+        except Exception as e:
+            print(e)
+            print("Failed to load: "+filename)
+            item = ""
+        target_patches = patchilizer.encode(item, add_special_patches=True, truncate=True, random_truncate=(self.mode=="train"))
+        input_masks = torch.tensor([1]*len(target_patches))
+        input_patches, selected_indices = mask_patches(target_patches, patchilizer, self.mode)
+        input_patches = input_patches.reshape(-1)
+        target_patches = torch.tensor(target_patches).reshape(-1)
+        return input_patches, input_masks, selected_indices, target_patches
+# call model with a batch of input
+def process_one_batch(batch):
+    input_patches, input_masks, selected_indices, target_patches = batch
+    loss = model(input_patches,
+                 input_masks,
+                 selected_indices,
+                 target_patches).loss
+    # Reduce the loss on GPU 0
+    if world_size > 1:
+        loss = loss.unsqueeze(0)
+        dist.reduce(loss, dst=0)
+        loss = loss / world_size
+        dist.broadcast(loss, src=0)
+    return loss.mean()
+# do one epoch for training
+def train_epoch(epoch):
+    tqdm_train_set = tqdm(train_set)
+    total_train_loss = 0
+    iter_idx = 1
+    model.train()
+    train_steps = (epoch-1)*len(train_set)
+    for batch in tqdm_train_set:
+        with autocast(device_type='cuda'):
+            loss = process_one_batch(batch)
+        scaler.scale(loss).backward()
+        total_train_loss += loss.item()
+        scaler.step(optimizer)
+        scaler.update()
+        lr_scheduler.step()
+        model.zero_grad(set_to_none=True)
+        tqdm_train_set.set_postfix({str(global_rank)+'_train_loss': total_train_loss / iter_idx})
+        train_steps += 1
+        # Log the training loss to wandb
+        if global_rank==0 and M3_WANDB_LOG:
+            wandb.log({"train_loss": total_train_loss / iter_idx}, step=train_steps)
+        iter_idx += 1
+        if iter_idx % 1000 == 0:
+            clear_unused_tensors()
+    return total_train_loss / (iter_idx-1)
+# do one epoch for eval
+def eval_epoch():
+    tqdm_eval_set = tqdm(eval_set)
+    total_eval_loss = 0
+    iter_idx = 1
+    model.eval()
+    # Evaluate data for one epoch
+    for batch in tqdm_eval_set:
+        with torch.no_grad():
+            loss = process_one_batch(batch)
+        total_eval_loss += loss.item()
+        tqdm_eval_set.set_postfix({str(global_rank)+'_eval_loss': total_eval_loss / iter_idx})
+        iter_idx += 1
+    return total_eval_loss / (iter_idx-1)
+# train and eval
+if __name__ == "__main__":
+    # Set up distributed training
+    world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1
+    global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else 0
+    local_rank = int(os.environ['LOCAL_RANK']) if 'LOCAL_RANK' in os.environ else 0
+    if world_size > 1:
+        torch.cuda.set_device(local_rank)
+        device = torch.device("cuda", local_rank)
+        dist.init_process_group(backend='nccl')
+    else:
+        device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    if M3_DETERMINISTIC:
+        seed = 42 + global_rank
+        random.seed(seed)
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+    encoder_config = BertConfig(vocab_size=1,
+                                hidden_size=M3_HIDDEN_SIZE,
+                                num_hidden_layers=PATCH_NUM_LAYERS,
+                                num_attention_heads=M3_HIDDEN_SIZE//64,
+                                intermediate_size=M3_HIDDEN_SIZE*4,
+                                max_position_embeddings=PATCH_LENGTH)
+    decoder_config = GPT2Config(vocab_size=128,
+                                n_positions=PATCH_SIZE,
+                                n_embd=M3_HIDDEN_SIZE,
+                                n_layer=TOKEN_NUM_LAYERS,
+                                n_head=M3_HIDDEN_SIZE//64,
+                                n_inner=M3_HIDDEN_SIZE*4)
+    model = M3Model(encoder_config, decoder_config)
+    model = model.to(device)
+    # print parameter number
+    print("Parameter Number: "+str(sum(p.numel() for p in model.parameters() if p.requires_grad)))
+    if world_size > 1:
+        model = DDP(model, device_ids=[local_rank], output_device=local_rank,  find_unused_parameters=True)
+    scaler = GradScaler()
+    optimizer = torch.optim.AdamW(model.parameters(), lr=M3_LEARNING_RATE)
+    if M3_WANDB_LOG and global_rank==0:
+        # Initialize wandb
+        if WANDB_KEY:
+            wandb.login(key=WANDB_KEY)
+        wandb.init(project="m3",
+                   name=M3_WEIGHTS_PATH.replace("weights_", "").replace(".pth", ""))
+    # load filenames under train and eval folder
+    train_files = list_files_in_directory(TRAIN_FOLDERS)
+    eval_files = list_files_in_directory(EVAL_FOLDERS)
+    if len(eval_files)==0:
+        train_files, eval_files = split_data(train_files)
+    train_batch_nums = int(len(train_files) / M3_BATCH_SIZE)
+    eval_batch_nums = int(len(eval_files) / M3_BATCH_SIZE)
+    train_files = train_files[:train_batch_nums*M3_BATCH_SIZE]
+    eval_files = eval_files[:eval_batch_nums*M3_BATCH_SIZE]
+    train_set = M3Dataset(train_files, 'train')
+    eval_set = M3Dataset(eval_files, 'eval')
+    train_sampler = DistributedSampler(train_set, num_replicas=world_size, rank=global_rank)
+    eval_sampler = DistributedSampler(eval_set, num_replicas=world_size, rank=global_rank)
+    train_set = DataLoader(train_set, batch_size=M3_BATCH_SIZE, collate_fn=collate_batch, sampler=train_sampler, shuffle = (train_sampler is None))
+    eval_set = DataLoader(eval_set, batch_size=M3_BATCH_SIZE, collate_fn=collate_batch, sampler=eval_sampler, shuffle = (train_sampler is None))
+    lr_scheduler = get_constant_schedule_with_warmup(optimizer = optimizer, num_warmup_steps = 1000)
+    if M3_LOAD_CKPT and os.path.exists(M3_WEIGHTS_PATH):
+        # Load checkpoint to CPU
+        checkpoint = torch.load(M3_WEIGHTS_PATH, map_location='cpu', weights_only=True)
+        # Here, model is assumed to be on GPU
+        # Load state dict to CPU model first, then move the model to GPU
+        if torch.cuda.device_count() > 1:
+            # If you have a DataParallel model, you need to load to model.module instead
+            cpu_model = deepcopy(model.module)
+            cpu_model.load_state_dict(checkpoint['model'])
+            model.module.load_state_dict(cpu_model.state_dict())
+        else:
+            # Load to a CPU clone of the model, then load back
+            cpu_model = deepcopy(model)
+            cpu_model.load_state_dict(checkpoint['model'])
+            model.load_state_dict(cpu_model.state_dict())
+        optimizer.load_state_dict(checkpoint['optimizer'])
+        lr_scheduler.load_state_dict(checkpoint['lr_sched'])
+        pre_epoch = checkpoint['epoch']
+        best_epoch = checkpoint['best_epoch']
+        min_eval_loss = checkpoint['min_eval_loss']
+        print(f"Successfully Loaded Checkpoint from Epoch {checkpoint['epoch']} with loss {checkpoint['min_eval_loss']}")
+        checkpoint = None
+    else:
+        pre_epoch = 0
+        best_epoch = 0
+        min_eval_loss = float('inf')
+    model = model.to(device)
+    optimizer = torch.optim.AdamW(model.parameters(), lr=M3_LEARNING_RATE)
+    for epoch in range(1+pre_epoch, M3_NUM_EPOCH+1):
+        train_sampler.set_epoch(epoch)
+        eval_sampler.set_epoch(epoch)
+        print('-' * 21 + "Epoch " + str(epoch) + '-' * 21)
+        train_loss = train_epoch(epoch)
+        eval_loss = eval_epoch()
+        if global_rank==0:
+            with open(M3_LOGS_PATH,'a') as f:
+                f.write("Epoch " + str(epoch) + "\ntrain_loss: " + str(train_loss) + "\neval_loss: " +str(eval_loss) + "\ntime: " + time.asctime(time.localtime(time.time())) + "\n\n")
+            if eval_loss < min_eval_loss:
+                best_epoch = epoch
+                min_eval_loss = eval_loss
+                checkpoint = {
+                                'model': model.module.state_dict() if hasattr(model, "module") else model.state_dict(),
+                                'optimizer': optimizer.state_dict(),
+                                'lr_sched': lr_scheduler.state_dict(),
+                                'epoch': epoch,
+                                'best_epoch': best_epoch,
+                                'min_eval_loss': min_eval_loss
+                                }
+                torch.save(checkpoint, M3_WEIGHTS_PATH)
+            checkpoint = {
+                            'model': model.module.state_dict() if hasattr(model, "module") else model.state_dict(),
+                            'optimizer': optimizer.state_dict(),
+                            'lr_sched': lr_scheduler.state_dict(),
+                            'epoch': epoch,
+                            'best_epoch': best_epoch,
+                            'min_eval_loss': min_eval_loss
+                            }
+            torch.save(checkpoint, "latest_"+M3_WEIGHTS_PATH)
+        if world_size > 1:
+            dist.barrier()
+    if global_rank==0:
+        print("Best Eval Epoch : "+str(best_epoch))
+        print("Min Eval Loss : "+str(min_eval_loss))

code/utils.py ADDED Viewed

	@@ -0,0 +1,483 @@

+import re
+import os
+import math
+import torch
+import random
+from config import *
+from unidecode import unidecode
+from torch.nn import functional as F
+from transformers import AutoModel, BertModel, GPT2LMHeadModel, PreTrainedModel, GPT2Config
+try:
+    import torch.distributed.nn
+    from torch import distributed as dist
+    has_distributed = True
+except ImportError:
+    has_distributed = False
+try:
+    import horovod.torch as hvd
+except ImportError:
+    hvd = None
+class ClipLoss(torch.nn.Module):
+    def __init__(
+            self,
+            local_loss=False,
+            gather_with_grad=False,
+            cache_labels=False,
+            rank=0,
+            world_size=1,
+            use_horovod=False,
+    ):
+        super().__init__()
+        self.local_loss = local_loss
+        self.gather_with_grad = gather_with_grad
+        self.cache_labels = cache_labels
+        self.rank = rank
+        self.world_size = world_size
+        self.use_horovod = use_horovod
+        # cache state
+        self.prev_num_logits = 0
+        self.labels = {}
+    def gather_features(
+            self,
+            image_features,
+            text_features,
+            local_loss=False,
+            gather_with_grad=False,
+            rank=0,
+            world_size=1,
+            use_horovod=False
+    ):
+        assert has_distributed, 'torch.distributed did not import correctly, please use a PyTorch version with support.'
+        if use_horovod:
+            assert hvd is not None, 'Please install horovod'
+            if gather_with_grad:
+                all_image_features = hvd.allgather(image_features)
+                all_text_features = hvd.allgather(text_features)
+            else:
+                with torch.no_grad():
+                    all_image_features = hvd.allgather(image_features)
+                    all_text_features = hvd.allgather(text_features)
+                if not local_loss:
+                    # ensure grads for local rank when all_* features don't have a gradient
+                    gathered_image_features = list(all_image_features.chunk(world_size, dim=0))
+                    gathered_text_features = list(all_text_features.chunk(world_size, dim=0))
+                    gathered_image_features[rank] = image_features
+                    gathered_text_features[rank] = text_features
+                    all_image_features = torch.cat(gathered_image_features, dim=0)
+                    all_text_features = torch.cat(gathered_text_features, dim=0)
+        else:
+            # We gather tensors from all gpus
+            if gather_with_grad:
+                all_image_features = torch.cat(torch.distributed.nn.all_gather(image_features), dim=0)
+                all_text_features = torch.cat(torch.distributed.nn.all_gather(text_features), dim=0)
+            else:
+                gathered_image_features = [torch.zeros_like(image_features) for _ in range(world_size)]
+                gathered_text_features = [torch.zeros_like(text_features) for _ in range(world_size)]
+                dist.all_gather(gathered_image_features, image_features)
+                dist.all_gather(gathered_text_features, text_features)
+                if not local_loss:
+                    # ensure grads for local rank when all_* features don't have a gradient
+                    gathered_image_features[rank] = image_features
+                    gathered_text_features[rank] = text_features
+                all_image_features = torch.cat(gathered_image_features, dim=0)
+                all_text_features = torch.cat(gathered_text_features, dim=0)
+        return all_image_features, all_text_features
+    def get_ground_truth(self, device, num_logits) -> torch.Tensor:
+        # calculated ground-truth and cache if enabled
+        if self.prev_num_logits != num_logits or device not in self.labels:
+            labels = torch.arange(num_logits, device=device, dtype=torch.long)
+            if self.world_size > 1 and self.local_loss:
+                labels = labels + num_logits * self.rank
+            if self.cache_labels:
+                self.labels[device] = labels
+                self.prev_num_logits = num_logits
+        else:
+            labels = self.labels[device]
+        return labels
+    def get_logits(self, image_features, text_features, logit_scale):
+        if self.world_size > 1:
+            all_image_features, all_text_features = self.gather_features(
+                image_features, text_features,
+                self.local_loss, self.gather_with_grad, self.rank, self.world_size, self.use_horovod)
+            if self.local_loss:
+                logits_per_image = logit_scale * image_features @ all_text_features.T
+                logits_per_text = logit_scale * text_features @ all_image_features.T
+            else:
+                logits_per_image = logit_scale * all_image_features @ all_text_features.T
+                logits_per_text = logits_per_image.T
+        else:
+            logits_per_image = logit_scale * image_features @ text_features.T
+            logits_per_text = logit_scale * text_features @ image_features.T
+        return logits_per_image, logits_per_text
+    def forward(self, image_features, text_features, logit_scale, output_dict=False):
+        device = image_features.device
+        logits_per_image, logits_per_text = self.get_logits(image_features, text_features, logit_scale)
+        labels = self.get_ground_truth(device, logits_per_image.shape[0])
+        total_loss = (
+            F.cross_entropy(logits_per_image, labels) +
+            F.cross_entropy(logits_per_text, labels)
+        ) / 2
+        return {"contrastive_loss": total_loss} if output_dict else total_loss
+class M3Patchilizer:
+    def __init__(self):
+        self.delimiters = ["|:", "::", ":|", "[|", "||", "|]", "|"]
+        self.regexPattern = '(' + '|'.join(map(re.escape, self.delimiters)) + ')'
+        self.pad_token_id = 0
+        self.bos_token_id = 1
+        self.eos_token_id = 2
+        self.mask_token_id = 3
+    def split_bars(self, body):
+        bars = re.split(self.regexPattern, ''.join(body))
+        bars = list(filter(None, bars))  # remove empty strings
+        if bars[0] in self.delimiters:
+            bars[1] = bars[0] + bars[1]
+            bars = bars[1:]
+        bars = [bars[i * 2] + bars[i * 2 + 1] for i in range(len(bars) // 2)]
+        return bars
+    def bar2patch(self, bar, patch_size=PATCH_SIZE):
+        patch = [self.bos_token_id] + [ord(c) for c in bar] + [self.eos_token_id]
+        patch = patch[:patch_size]
+        patch += [self.pad_token_id] * (patch_size - len(patch))
+        return patch
+    def patch2bar(self, patch):
+        return ''.join(chr(idx) if idx > self.mask_token_id else '' for idx in patch)
+    def encode(self,
+               item,
+               patch_size=PATCH_SIZE,
+               add_special_patches=False,
+               truncate=False,
+               random_truncate=False):
+        item = unidecode(item)
+        lines = re.findall(r'.*?\n|.*$', item)
+        lines = list(filter(None, lines))  # remove empty lines
+        patches = []
+        if lines[0].split(" ")[0] == "ticks_per_beat":
+            patch = ""
+            for line in lines:
+                if patch.startswith(line.split(" ")[0]) and (len(patch) + len(" ".join(line.split(" ")[1:])) <= patch_size-2):
+                    patch = patch[:-1] + "\t" + " ".join(line.split(" ")[1:])
+                else:
+                    if patch:
+                        patches.append(patch)
+                    patch = line
+            if patch!="":
+                patches.append(patch)
+        else:
+            for line in lines:
+                if len(line) > 1 and ((line[0].isalpha() and line[1] == ':') or line.startswith('%%')):
+                    patches.append(line)
+                else:
+                    bars = self.split_bars(line)
+                    if bars:
+                        bars[-1] += '\n'
+                        patches.extend(bars)
+        if add_special_patches:
+            bos_patch = chr(self.bos_token_id) * patch_size
+            eos_patch = chr(self.eos_token_id) * patch_size
+            patches = [bos_patch] + patches + [eos_patch]
+        if len(patches) > PATCH_LENGTH and truncate:
+            choices = ["head", "tail", "middle"]
+            choice = random.choice(choices)
+            if choice=="head" or random_truncate==False:
+                patches = patches[:PATCH_LENGTH]
+            elif choice=="tail":
+                patches = patches[-PATCH_LENGTH:]
+            else:
+                start = random.randint(1, len(patches)-PATCH_LENGTH)
+                patches = patches[start:start+PATCH_LENGTH]
+        patches = [self.bar2patch(patch) for patch in patches]
+        return patches
+    def decode(self, patches):
+        return ''.join(self.patch2bar(patch) for patch in patches)
+class M3PatchEncoder(PreTrainedModel):
+    def __init__(self, config):
+        super(M3PatchEncoder, self).__init__(config)
+        self.patch_embedding = torch.nn.Linear(PATCH_SIZE*128, M3_HIDDEN_SIZE)
+        torch.nn.init.normal_(self.patch_embedding.weight, std=0.02)
+        self.base = BertModel(config=config)
+        self.pad_token_id = 0
+        self.bos_token_id = 1
+        self.eos_token_id = 2
+        self.mask_token_id = 3
+    def forward(self,
+                input_patches, # [batch_size, seq_length, hidden_size]
+                input_masks):  # [batch_size, seq_length]
+        # Transform input_patches into embeddings
+        input_patches = torch.nn.functional.one_hot(input_patches, num_classes=128)
+        input_patches = input_patches.reshape(len(input_patches), -1, PATCH_SIZE*128).type(torch.FloatTensor)
+        input_patches = self.patch_embedding(input_patches.to(self.device))
+        # Apply BERT model to input_patches and input_masks
+        return self.base(inputs_embeds=input_patches, attention_mask=input_masks)
+class M3TokenDecoder(PreTrainedModel):
+    def __init__(self, config):
+        super(M3TokenDecoder, self).__init__(config)
+        self.base = GPT2LMHeadModel(config=config)
+        self.pad_token_id = 0
+        self.bos_token_id = 1
+        self.eos_token_id = 2
+        self.mask_token_id = 3
+    def forward(self,
+                patch_features,  # [batch_size, hidden_size]
+                target_patches): # [batch_size, seq_length]
+        # get input embeddings
+        inputs_embeds = torch.nn.functional.embedding(target_patches, self.base.transformer.wte.weight)
+        # concatenate the encoded patches with the input embeddings
+        inputs_embeds = torch.cat((patch_features.unsqueeze(1), inputs_embeds[:,1:,:]), dim=1)
+        # preparing the labels for model training
+        target_masks = target_patches == self.pad_token_id
+        target_patches = target_patches.clone().masked_fill_(target_masks, -100)
+        # get the attention mask
+        target_masks = ~target_masks
+        target_masks = target_masks.type(torch.int)
+        return self.base(inputs_embeds=inputs_embeds,
+                         attention_mask=target_masks,
+                         labels=target_patches)
+    def generate(self,
+                 patch_feature,
+                 tokens):
+        # reshape the patch_feature and tokens
+        patch_feature = patch_feature.reshape(1, 1, -1)
+        tokens = tokens.reshape(1, -1)
+        # get input embeddings
+        tokens = torch.nn.functional.embedding(tokens, self.base.transformer.wte.weight)
+        # concatenate the encoded patches with the input embeddings
+        tokens = torch.cat((patch_feature, tokens[:,1:,:]), dim=1)
+        # get the outputs from the model
+        outputs = self.base(inputs_embeds=tokens)
+        # get the probabilities of the next token
+        probs = torch.nn.functional.softmax(outputs.logits.squeeze(0)[-1], dim=-1)
+        return probs.detach().cpu().numpy()
+class M3Model(PreTrainedModel):
+    def __init__(self, encoder_config, decoder_config):
+        super(M3Model, self).__init__(encoder_config)
+        self.encoder = M3PatchEncoder(encoder_config)
+        self.decoder = M3TokenDecoder(decoder_config)
+        self.pad_token_id = 0
+        self.bos_token_id = 1
+        self.eos_token_id = 2
+        self.mask_token_id = 3
+    def forward(self,
+                input_patches,      # [batch_size, seq_length, hidden_size]
+                input_masks,        # [batch_size, seq_length]
+                selected_indices,   # [batch_size, seq_length]
+                target_patches):    # [batch_size, seq_length, hidden_size]
+        input_patches = input_patches.reshape(len(input_patches), -1, PATCH_SIZE).to(self.device)
+        input_masks = input_masks.to(self.device)
+        selected_indices = selected_indices.to(self.device)
+        target_patches = target_patches.reshape(len(target_patches), -1, PATCH_SIZE).to(self.device)
+        # Pass the input_patches and input_masks through the encoder
+        outputs = self.encoder(input_patches, input_masks)["last_hidden_state"]
+        # Use selected_indices to form target_patches
+        target_patches = target_patches[selected_indices.bool()]
+        patch_features = outputs[selected_indices.bool()]
+        # Pass patch_features and target_patches through the decoder
+        return self.decoder(patch_features, target_patches)
+class CLaMP2Model(PreTrainedModel):
+    def __init__(self,
+                 music_config,
+                 global_rank=None,
+                 world_size=None,
+                 text_model_name=TEXT_MODEL_NAME,
+                 hidden_size=CLAMP2_HIDDEN_SIZE,
+                 load_m3=CLAMP2_LOAD_M3):
+        super(CLaMP2Model, self).__init__(music_config)
+        self.text_model = AutoModel.from_pretrained(text_model_name) # Load the text model
+        self.text_proj = torch.nn.Linear(self.text_model.config.hidden_size, hidden_size) # Linear layer for text projections
+        torch.nn.init.normal_(self.text_proj.weight, std=0.02) # Initialize weights with normal distribution
+        self.music_model = M3PatchEncoder(music_config) # Initialize the music model
+        self.music_proj = torch.nn.Linear(M3_HIDDEN_SIZE, hidden_size) # Linear layer for music projections
+        torch.nn.init.normal_(self.music_proj.weight, std=0.02) # Initialize weights with normal distribution
+        if global_rank==None or world_size==None:
+            global_rank = 0
+            world_size = 1
+        self.loss_fn = ClipLoss(local_loss=False,
+                                gather_with_grad=True,
+                                cache_labels=False,
+                                rank=global_rank,
+                                world_size=world_size,
+                                use_horovod=False)
+        if load_m3 and os.path.exists(M3_WEIGHTS_PATH):
+            checkpoint = torch.load(M3_WEIGHTS_PATH, map_location='cpu', weights_only=True)
+            decoder_config = GPT2Config(vocab_size=128,
+                            n_positions=PATCH_SIZE,
+                            n_embd=M3_HIDDEN_SIZE,
+                            n_layer=TOKEN_NUM_LAYERS,
+                            n_head=M3_HIDDEN_SIZE//64,
+                            n_inner=M3_HIDDEN_SIZE*4)
+            model = M3Model(music_config, decoder_config)
+            model.load_state_dict(checkpoint['model'])
+            self.music_model = model.encoder
+            model = None
+            print(f"Successfully Loaded M3 Checkpoint from Epoch {checkpoint['epoch']} with loss {checkpoint['min_eval_loss']}")
+    def avg_pooling(self, input_features, input_masks):
+        input_masks = input_masks.unsqueeze(-1).to(self.device) # add a dimension to match the feature dimension
+        input_features = input_features * input_masks # apply mask to input_features
+        avg_pool = input_features.sum(dim=1) / input_masks.sum(dim=1) # calculate average pooling
+        return avg_pool
+    def get_text_features(self,
+                          text_inputs,
+                          text_masks,
+                          get_normalized=False):
+        text_features = self.text_model(text_inputs.to(self.device),
+                                        attention_mask=text_masks.to(self.device))['last_hidden_state']
+        if get_normalized:
+            text_features = self.avg_pooling(text_features, text_masks)
+            text_features = self.text_proj(text_features)
+        return text_features
+    def get_music_features(self,
+                            music_inputs,
+                            music_masks,
+                            get_normalized=False):
+        music_features = self.music_model(music_inputs.to(self.device),
+                                          music_masks.to(self.device))['last_hidden_state']
+        if get_normalized:
+            music_features = self.avg_pooling(music_features, music_masks)
+            music_features = self.music_proj(music_features)
+        return music_features
+    def forward(self,
+                text_inputs,    # [batch_size, seq_length]
+                text_masks,     # [batch_size, seq_length]
+                music_inputs,   # [batch_size, seq_length, hidden_size]
+                music_masks):   # [batch_size, seq_length]
+        # Compute the text features
+        text_features = self.get_text_features(text_inputs, text_masks, get_normalized=True)
+        # Compute the music features
+        music_features = self.get_music_features(music_inputs, music_masks, get_normalized=True)
+        return self.loss_fn(text_features,
+                            music_features,
+                            LOGIT_SCALE,
+                            output_dict=False)
+def split_data(data, eval_ratio=EVAL_SPLIT):
+    random.shuffle(data)
+    split_idx = int(len(data)*eval_ratio)
+    eval_set = data[:split_idx]
+    train_set = data[split_idx:]
+    return train_set, eval_set
+def mask_patches(target_patches, patchilizer, mode):
+    indices = list(range(len(target_patches)))
+    random.shuffle(indices)
+    selected_indices = indices[:math.ceil(M3_MASK_RATIO*len(indices))]
+    sorted_indices = sorted(selected_indices)
+    input_patches = torch.tensor(target_patches)
+    if mode=="eval":
+        choice = "original"
+    else:
+        choice = random.choices(["mask", "shuffle", "original"], weights=[0.8, 0.1, 0.1])[0]
+    if choice=="mask":
+        input_patches[sorted_indices] = torch.tensor([patchilizer.mask_token_id]*PATCH_SIZE)
+    elif choice=="shuffle":
+        for idx in sorted_indices:
+            patch = input_patches[idx]
+            try:
+                index_eos = (patch == patchilizer.eos_token_id).nonzero().item()
+            except:
+                index_eos = len(patch)
+            indices = list(range(1, index_eos))
+            random.shuffle(indices)
+            indices = [0] + indices + list(range(index_eos, len(patch)))
+            input_patches[idx] = patch[indices]
+    selected_indices = torch.zeros(len(target_patches))
+    selected_indices[sorted_indices] = 1.
+    return input_patches, selected_indices
+def remove_instrument_info(item):
+    # remove instrument information from symbolic music
+    lines = re.findall(r'.*?\n|.*$', item)
+    lines = list(filter(None, lines))
+    if lines[0].split(" ")[0] == "ticks_per_beat":
+        type = "mtf"
+    else:
+        type = "abc"
+    cleaned_lines = []
+    for line in lines:
+        if type=="abc" and line.startswith("V:"):
+            # find the position of " nm=" or " snm="
+            nm_pos = line.find(" nm=")
+            snm_pos = line.find(" snm=")
+            # keep the part before " nm=" or " snm="
+            if nm_pos != -1:
+                line = line[:nm_pos]
+            elif snm_pos != -1:
+                line = line[:snm_pos]
+            if nm_pos != -1 or snm_pos != -1:
+                line += "\n"
+        elif type=="mtf" and line.startswith("program_change"):
+            line = " ".join(line.split(" ")[:-1]) + " 0\n"
+        cleaned_lines.append(line)
+    return ''.join(cleaned_lines)

environment.yml ADDED Viewed

	@@ -0,0 +1,204 @@

+name: clamp2
+channels:
+  - pytorch
+  - nvidia
+  - defaults
+dependencies:
+  - blas=1.0=mkl
+  - brotli-python=1.0.9=py310hd77b12b_8
+  - bzip2=1.0.8=h2bbff1b_6
+  - ca-certificates=2024.7.2=haa95532_0
+  - cuda-cccl=12.6.37=0
+  - cuda-cccl_win-64=12.6.37=0
+  - cuda-cudart=11.8.89=0
+  - cuda-cudart-dev=11.8.89=0
+  - cuda-cupti=11.8.87=0
+  - cuda-libraries=11.8.0=0
+  - cuda-libraries-dev=11.8.0=0
+  - cuda-nvrtc=11.8.89=0
+  - cuda-nvrtc-dev=11.8.89=0
+  - cuda-nvtx=11.8.86=0
+  - cuda-profiler-api=12.6.68=0
+  - cuda-runtime=11.8.0=0
+  - cuda-version=12.6=3
+  - freetype=2.12.1=ha860e81_0
+  - gmpy2=2.1.2=py310h7f96b67_0
+  - intel-openmp=2023.1.0=h59b6b97_46320
+  - jinja2=3.1.4=py310haa95532_0
+  - jpeg=9e=h827c3e9_3
+  - lcms2=2.12=h83e58a3_0
+  - lerc=3.0=hd77b12b_0
+  - libcublas=11.11.3.6=0
+  - libcublas-dev=11.11.3.6=0
+  - libcufft=10.9.0.58=0
+  - libcufft-dev=10.9.0.58=0
+  - libcurand=10.3.7.68=0
+  - libcurand-dev=10.3.7.68=0
+  - libcusolver=11.4.1.48=0
+  - libcusolver-dev=11.4.1.48=0
+  - libcusparse=11.7.5.86=0
+  - libcusparse-dev=11.7.5.86=0
+  - libdeflate=1.17=h2bbff1b_1
+  - libffi=3.4.4=hd77b12b_1
+  - libjpeg-turbo=2.0.0=h196d8e1_0
+  - libnpp=11.8.0.86=0
+  - libnpp-dev=11.8.0.86=0
+  - libnvjpeg=11.9.0.86=0
+  - libnvjpeg-dev=11.9.0.86=0
+  - libpng=1.6.39=h8cc25b3_0
+  - libtiff=4.5.1=hd77b12b_0
+  - libuv=1.48.0=h827c3e9_0
+  - libwebp-base=1.3.2=h2bbff1b_0
+  - lz4-c=1.9.4=h2bbff1b_1
+  - mkl=2023.1.0=h6b88ed4_46358
+  - mkl-service=2.4.0=py310h2bbff1b_1
+  - mkl_fft=1.3.8=py310h2bbff1b_0
+  - mkl_random=1.2.4=py310h59b6b97_0
+  - mpc=1.1.0=h7edee0f_1
+  - mpfr=4.0.2=h62dcd97_1
+  - mpir=3.0.0=hec2e145_1
+  - mpmath=1.3.0=py310haa95532_0
+  - networkx=3.3=py310haa95532_0
+  - numpy=1.26.4=py310h055cbcc_0
+  - numpy-base=1.26.4=py310h65a83cf_0
+  - openjpeg=2.5.2=hae555c5_0
+  - openssl=3.0.14=h827c3e9_0
+  - pip=24.2=py310haa95532_0
+  - pysocks=1.7.1=py310haa95532_0
+  - python=3.10.14=he1021f5_1
+  - pytorch=2.4.0=py3.10_cuda11.8_cudnn9_0
+  - pytorch-cuda=11.8=h24eeafa_5
+  - pytorch-mutex=1.0=cuda
+  - pyyaml=6.0.1=py310h2bbff1b_0
+  - requests=2.32.3=py310haa95532_0
+  - setuptools=72.1.0=py310haa95532_0
+  - sqlite=3.45.3=h2bbff1b_0
+  - sympy=1.13.2=py310haa95532_0
+  - tbb=2021.8.0=h59b6b97_0
+  - tk=8.6.14=h0416ee5_0
+  - typing_extensions=4.11.0=py310haa95532_0
+  - tzdata=2024a=h04d1e81_0
+  - vc=14.40=h2eaa2aa_0
+  - vs2015_runtime=14.40.33807=h98bb1dd_0
+  - wheel=0.43.0=py310haa95532_0
+  - win_inet_pton=1.1.0=py310haa95532_0
+  - xz=5.4.6=h8cc25b3_1
+  - yaml=0.2.5=he774522_0
+  - zlib=1.2.13=h8cc25b3_1
+  - zstd=1.5.5=hd43e919_2
+  - pip:
+      - abctoolkit==0.0.4
+      - accelerate==0.34.0
+      - aiohappyeyeballs==2.4.0
+      - aiohttp==3.10.5
+      - aiosignal==1.3.1
+      - annotated-types==0.7.0
+      - anyio==4.6.2.post1
+      - async-timeout==4.0.3
+      - attrs==24.2.0
+      - audioread==3.0.1
+      - certifi==2023.7.22
+      - cffi==1.17.0
+      - chardet==5.2.0
+      - charset-normalizer==3.2.0
+      - click==8.1.7
+      - colorama==0.4.6
+      - coloredlogs==15.0.1
+      - cycler==0.11.0
+      - datasets==2.21.0
+      - decorator==5.1.1
+      - dill==0.3.8
+      - distro==1.9.0
+      - docker-pycreds==0.4.0
+      - exceptiongroup==1.2.2
+      - filelock==3.12.2
+      - fonttools==4.38.0
+      - frozenlist==1.4.1
+      - fsspec==2024.6.1
+      - gitdb==4.0.11
+      - gitpython==3.1.43
+      - h11==0.14.0
+      - httpcore==1.0.6
+      - httpx==0.27.2
+      - huggingface-hub==0.24.6
+      - humanfriendly==10.0
+      - idna==3.4
+      - importlib-metadata==6.7.0
+      - jellyfish==1.0.0
+      - jiter==0.6.1
+      - joblib==1.3.2
+      - jsonpickle==3.0.2
+      - kiwisolver==1.4.4
+      - langcodes==3.4.0
+      - langdetect==1.0.9
+      - langid==1.1.6
+      - language-data==1.2.0
+      - lazy-loader==0.4
+      - levenshtein==0.25.1
+      - librosa==0.10.1
+      - llvmlite==0.43.0
+      - lxml==5.3.0
+      - marisa-trie==1.2.0
+      - markupsafe==2.1.5
+      - matplotlib==3.5.3
+      - mido==1.3.0
+      - more-itertools==9.1.0
+      - msgpack==1.0.8
+      - multidict==6.0.5
+      - multiprocess==0.70.16
+      - music21==7.3.3
+      - nltk==3.8.1
+      - numba==0.60.0
+      - openai==1.51.2
+      - optimum==1.21.4
+      - packaging==23.1
+      - pandas==1.3.5
+      - pillow==9.5.0
+      - platformdirs==4.2.2
+      - pooch==1.8.2
+      - portalocker==2.10.1
+      - protobuf==5.28.0
+      - psutil==6.0.0
+      - pyarrow==17.0.0
+      - pycparser==2.22
+      - pydantic==2.9.2
+      - pydantic-core==2.23.4
+      - pydub==0.25.1
+      - pyparsing==3.1.1
+      - pyreadline3==3.4.1
+      - python-dateutil==2.8.2
+      - pytz==2023.3
+      - pywin32==306
+      - rapidfuzz==3.9.7
+      - rarfile==4.1
+      - regex==2023.8.8
+      - sacrebleu==2.4.3
+      - sacremoses==0.0.53
+      - safetensors==0.4.4
+      - samplings==0.1.7
+      - scikit-learn==1.5.1
+      - scipy==1.14.1
+      - sentencepiece==0.2.0
+      - sentry-sdk==2.13.0
+      - setproctitle==1.3.3
+      - six==1.16.0
+      - smmap==5.0.1
+      - sniffio==1.3.1
+      - soundfile==0.12.1
+      - soxr==0.5.0.post1
+      - tabulate==0.9.0
+      - threadpoolctl==3.5.0
+      - tokenizers==0.19.1
+      - torch==2.4.0
+      - torchaudio==2.4.0
+      - torchvision==0.19.0
+      - tqdm==4.66.5
+      - transformers==4.40.0
+      - typing-extensions==4.12.2
+      - unidecode==1.3.6
+      - urllib3==2.0.4
+      - wandb==0.17.8
+      - webcolors==1.13
+      - xxhash==3.5.0
+      - yarl==1.9.7
+      - zipp==3.15.0

music_classification/README.md ADDED Viewed

	@@ -0,0 +1,46 @@

+# Music Classification Codebase
+## Overview
+Linear Probe is a powerful classification tool that leverages feature representations for supervised learning tasks. This codebase includes scripts for training a linear classification model, performing classification on new feature data. The features utilized can be extracted from the M3 or CLaMP 2 models, ensuring that the time dimension information is preserved and **not normalized**. Below is a description of the scripts contained in the `music_classification/` folder.
+## Repository Structure
+The `music_classification/` folder contains the following scripts:
+### 1. `config.py`
+This script defines configurations for the linear probe training and inference, specifying training data paths and parameters like learning rate, number of epochs, and hidden size.
+### 2. `inference_cls.py`
+This script enables the classification of feature vectors using a pre-trained linear probe model.
+#### JSON Output Format
+The resulting JSON file contains a dictionary with the following structure:
+```json
+{
+    "path/to/feature1.npy": "class_A",
+    "path/to/feature2.npy": "class_B",
+    "path/to/feature3.npy": "class_A"
+}
+```
+- **Key**: The path to the input feature file (e.g., `feature1.npy`).
+- **Value**: The predicted class label assigned by the linear probe model (e.g., `class_A`).
+#### Usage
+```bash
+python inference_cls.py <feature_folder> <output_file>
+```
+- `feature_folder`: Directory containing input feature files (in `.npy` format).
+- `output_file`: File path to save the classification results (in JSON format).
+### 3. `train_cls.py`
+This script is designed for training the linear classification model.
+#### Usage
+```bash
+python train_cls.py
+```
+### 4. `utils.py`
+The utility script defines the architecture of the linear classification model.
+## Naming Convention
+All `.npy` files used in this codebase must follow the naming convention of `label_filename.npy`, where the filename should not contain any underscores (`_`).

music_classification/config.py ADDED Viewed

	@@ -0,0 +1,26 @@

+# Configuration for generative modelling and classification
+TRAIN_FOLDERS = [
+    "<path_to_training_data>"  # Directory containing training data
+]
+EVAL_FOLDERS = [
+    ""  # (Optional) Directory containing evaluation data
+]
+EVAL_SPLIT = 0.2  # Fraction of training data to use for evaluation
+# Weights and Biases configuration
+WANDB_KEY = "<your_wandb_key>"  # Set M3/CLaMP2_WANDB_LOG=False if no API key for Weights and Biases logging
+# Model Configuration
+INPUT_HIDDEN_SIZE = 768  # Input hidden size
+HIDDEN_SIZE = 768  # Model hidden size
+NUM_EPOCHS = 1000  # Max number of epochs to train (early stopping can terminate earlier)
+LEARNING_RATE = 1e-5  # Optimizer learning rate
+BALANCED_TRAINING = False  # Set to True to balance labels in training data
+WANDB_LOG = False  # Set to True to log training metrics to WANDB
+# Paths Configuration
+last_folder_name = TRAIN_FOLDERS[-1].split('/')[-1]
+WEIGHTS_PATH = f"weights-{last_folder_name}.pth"  # Weights file path
+LOGS_PATH = f"logs-{last_folder_name}.txt"  # Log file path

music_classification/inference_cls.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import os
+import json
+import torch
+import random
+import numpy as np
+from utils import *
+from tqdm import tqdm
+from samplings import *
+import argparse
+def list_files_in_directory(directories, extensions=["npy"]):
+    file_list = []
+    for directory in directories:
+        for root, dirs, files in os.walk(directory):
+            for file in files:
+                if any(file.endswith(ext) for ext in extensions):
+                    file_path = os.path.join(root, file)
+                    file_list.append(file_path)
+    return file_list
+if __name__ == "__main__":
+    # Setup argument parser
+    parser = argparse.ArgumentParser(description="Feature extraction and classification with CLaMP2.")
+    parser.add_argument("feature_folder", type=str, help="Directory containing input feature files.")
+    parser.add_argument("output_file", type=str, help="File to save the classification results. (format: json)")
+    # Parse arguments
+    args = parser.parse_args()
+    feature_folder = args.feature_folder
+    output_file = args.output_file
+    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    seed = 42
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+    checkpoint = torch.load(WEIGHTS_PATH, map_location='cpu')
+    print(f"Successfully Loaded Checkpoint from Epoch {checkpoint['epoch']} with acc {checkpoint['max_eval_acc']}")
+    label2idx = checkpoint['labels']
+    idx2label = {idx: label for label, idx in label2idx.items()}  # Create reverse mapping
+    model = LinearClassification(num_classes=len(label2idx))
+    model = model.to(device)
+    # print parameter number
+    print("Parameter Number: "+str(sum(p.numel() for p in model.parameters() if p.requires_grad)))
+    model.eval()
+    model.load_state_dict(checkpoint['model'])
+    # load filenames under train and eval folder
+    feature_files = list_files_in_directory([feature_folder])
+    cls_results = {}
+    for filepath in tqdm(feature_files):
+        outputs = np.load(filepath)[0]
+        outputs = torch.from_numpy(outputs).to(device)
+        outputs = outputs.unsqueeze(0)
+        cls_list = model(outputs)[0].tolist()
+        max_prob = max(cls_list)
+        cls_idx = cls_list.index(max_prob)
+        cls_label = idx2label[cls_idx]
+        cls_results[filepath] = cls_label
+    with open(output_file, "w", encoding="utf-8") as f:
+        json.dump(cls_results, f)

music_classification/train_cls.py ADDED Viewed

	@@ -0,0 +1,293 @@

+import os
+import time
+import math
+import wandb
+import torch
+import random
+import numpy as np
+from utils import *
+from config import *
+from tqdm import tqdm
+from sklearn.metrics import f1_score
+from torch.amp import autocast, GradScaler
+from torch.utils.data import Dataset, DataLoader
+from transformers import get_constant_schedule_with_warmup
+import torch.distributed as dist
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.utils.data.distributed import DistributedSampler
+# Set up distributed training
+world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1
+global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else 0
+local_rank = int(os.environ['LOCAL_RANK']) if 'LOCAL_RANK' in os.environ else 0
+if world_size > 1:
+    torch.cuda.set_device(local_rank)
+    device = torch.device("cuda", local_rank)
+    dist.init_process_group(backend='nccl') if world_size > 1 else None
+else:
+    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+# Set random seed
+seed = 42 + global_rank
+random.seed(seed)
+np.random.seed(seed)
+torch.manual_seed(seed)
+torch.cuda.manual_seed_all(seed)
+torch.backends.cudnn.deterministic = True
+torch.backends.cudnn.benchmark = False
+batch_size = 1
+def collate_batch(input_tensors):
+    input_tensors, labels = zip(*input_tensors)
+    input_tensors = torch.stack(input_tensors, dim=0)
+    labels = torch.stack(labels, dim=0)
+    return input_tensors.to(device), labels.to(device)
+def list_files_in_directory(directories):
+    file_list = []
+    for directory in directories:
+        for root, dirs, files in os.walk(directory):
+            for file in files:
+                if file.endswith(".npy"):
+                    file_path = os.path.join(root, file)
+                    file_list.append(file_path)
+    return file_list
+class TensorDataset(Dataset):
+    def __init__(self, filenames):
+        print(f"Loading {len(filenames)} files for classification")
+        self.filenames = []
+        self.label2idx = {}
+        for filename in tqdm(filenames):
+            label = os.path.basename(filename).split('_')[0]
+            self.filenames.append(filename)
+            if label not in self.label2idx:
+                self.label2idx[label] = len(self.label2idx)
+        print(f"Found {len(self.label2idx)} classes")
+    def __len__(self):
+        return len(self.filenames)
+    def __getitem__(self, idx):
+        filename = self.filenames[idx]
+        label = os.path.basename(filename).split('_')[0]
+        label = self.label2idx[label]
+        # load numpy file
+        data = np.load(filename)
+        data = torch.from_numpy(data)[0]
+        label = torch.tensor(label)
+        return data, label
+class BalancedTensorDataset(Dataset):
+    def __init__(self, filenames):
+        print(f"Loading {len(filenames)} files for classification")
+        self.filenames = filenames
+        self.label2idx = {}
+        self.label2files = {}
+        for filename in tqdm(filenames):
+            label = os.path.basename(filename).split('_')[0]
+            if label not in self.label2idx:
+                self.label2idx[label] = len(self.label2idx)
+            if label not in self.label2files:
+                self.label2files[label] = []
+            self.label2files[label].append(filename)
+        print(f"Found {len(self.label2idx)} classes")
+        self.min_samples = min(len(files) for files in self.label2files.values())
+        self._update_epoch_filenames()
+    def _update_epoch_filenames(self):
+        self.epoch_filenames = []
+        for label, files in self.label2files.items():
+            sampled_files = random.sample(files, self.min_samples)
+            self.epoch_filenames.extend(sampled_files)
+        random.shuffle(self.epoch_filenames)
+    def __len__(self):
+        return len(self.epoch_filenames)
+    def __getitem__(self, idx):
+        filename = self.epoch_filenames[idx]
+        label = os.path.basename(filename).split('_')[0]
+        label = self.label2idx[label]
+        data = np.load(filename)
+        data = torch.from_numpy(data)[0]
+        label = torch.tensor(label)
+        return data, label
+    def on_epoch_end(self):
+        self._update_epoch_filenames()
+# load filenames under train and eval folder
+train_files = list_files_in_directory(TRAIN_FOLDERS)
+eval_files = list_files_in_directory(EVAL_FOLDERS)
+if len(eval_files)==0:
+    random.shuffle(train_files)
+    eval_files = train_files[:math.ceil(len(train_files)*EVAL_SPLIT)]
+    train_files = train_files[math.ceil(len(train_files)*EVAL_SPLIT):]
+if BALANCED_TRAINING:
+    train_set = BalancedTensorDataset(train_files)
+else:
+    train_set = TensorDataset(train_files)
+eval_set = TensorDataset(eval_files)
+eval_set.label2idx = train_set.label2idx
+model = LinearClassification(num_classes=len(train_set.label2idx))
+model = model.to(device)
+# print parameter number
+print("Parameter Number: "+str(sum(p.numel() for p in model.parameters() if p.requires_grad)))
+if world_size > 1:
+    model = DDP(model, device_ids=[local_rank], output_device=local_rank,  find_unused_parameters=True)
+scaler = GradScaler()
+is_autocast = True
+optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
+loss_fn = torch.nn.CrossEntropyLoss()
+# call model with a batch of input
+def process_one_batch(batch):
+    input_tensors, labels = batch
+    logits = model(input_tensors)
+    loss = loss_fn(logits, labels)
+    prediction = torch.argmax(logits, dim=1)
+    acc_num = torch.sum(prediction==labels)
+    return loss, acc_num, prediction, labels
+# do one epoch for training
+def train_epoch():
+    tqdm_train_set = tqdm(train_set)
+    total_train_loss = 0
+    total_acc_num = 0
+    iter_idx = 1
+    model.train()
+    for batch in tqdm_train_set:
+        if is_autocast:
+            with autocast(device_type='cuda'):
+                loss, acc_num, prediction, labels = process_one_batch(batch)
+            scaler.scale(loss).backward()
+            scaler.step(optimizer)
+            scaler.update()
+        else:
+            loss, acc_num, prediction, labels = process_one_batch(batch)
+            loss.backward()
+            optimizer.step()
+        lr_scheduler.step()
+        model.zero_grad(set_to_none=True)
+        total_train_loss += loss.item()
+        total_acc_num += acc_num.item()
+        tqdm_train_set.set_postfix({str(global_rank)+'_train_acc': total_acc_num / (iter_idx*batch_size)})
+        # Log the training loss to wandb
+        if global_rank==0 and WANDB_LOG:
+            wandb.log({"acc": total_acc_num / (iter_idx*batch_size)})
+        iter_idx += 1
+    if BALANCED_TRAINING:
+        train_set.dataset.on_epoch_end()
+    return total_acc_num / ((iter_idx-1)*batch_size)
+# do one epoch for eval
+def eval_epoch():
+    tqdm_eval_set = tqdm(eval_set)
+    total_eval_loss = 0
+    total_acc_num = 0
+    iter_idx = 1
+    model.eval()
+    all_predictions = []
+    all_labels = []
+    # Evaluate data for one epoch
+    for batch in tqdm_eval_set:
+        with torch.no_grad():
+            loss, acc_num, prediction, labels = process_one_batch(batch)
+            total_eval_loss += loss.item()
+            total_acc_num += acc_num.item()
+            # Accumulate predictions and labels
+            all_predictions.extend(prediction.cpu().numpy())
+            all_labels.extend(labels.cpu().numpy())
+        tqdm_eval_set.set_postfix({str(global_rank)+'_eval_acc': total_acc_num / (iter_idx*batch_size)})
+        iter_idx += 1
+    # Compute F1 Macro
+    f1_macro = f1_score(all_labels, all_predictions, average='macro')
+    return total_acc_num / ((iter_idx - 1) * batch_size), f1_macro
+# train and eval
+if __name__ == "__main__":
+    label2idx = train_set.label2idx
+    max_eval_acc = 0
+    train_sampler = DistributedSampler(train_set, num_replicas=world_size, rank=global_rank)
+    eval_sampler = DistributedSampler(eval_set, num_replicas=world_size, rank=global_rank)
+    train_set = DataLoader(train_set, batch_size=batch_size, collate_fn=collate_batch, sampler=train_sampler, shuffle = (train_sampler is None))
+    eval_set = DataLoader(eval_set, batch_size=batch_size, collate_fn=collate_batch, sampler=eval_sampler, shuffle = (train_sampler is None))
+    lr_scheduler = get_constant_schedule_with_warmup(optimizer = optimizer, num_warmup_steps = len(train_set))
+    model = model.to(device)
+    optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
+    if WANDB_LOG and global_rank==0:
+        # Initialize wandb
+        if WANDB_KEY:
+            wandb.login(key=WANDB_KEY)
+        wandb.init(project="linear",
+                   name=WEIGHTS_PATH.replace("weights_", "").replace(".pth", ""))
+    for epoch in range(1, NUM_EPOCHS+1):
+        train_sampler.set_epoch(epoch)
+        eval_sampler.set_epoch(epoch)
+        print('-' * 21 + "Epoch " + str(epoch) + '-' * 21)
+        train_acc = train_epoch()
+        eval_acc, eval_f1_macro = eval_epoch()
+        if global_rank==0:
+            with open(LOGS_PATH,'a') as f:
+                f.write("Epoch " + str(epoch) + "\ntrain_acc: " + str(train_acc) + "\neval_acc: " +str(eval_acc) + "\neval_f1_macro: " +str(eval_f1_macro) + "\ntime: " + time.asctime(time.localtime(time.time())) + "\n\n")
+            if eval_acc > max_eval_acc:
+                best_epoch = epoch
+                max_eval_acc = eval_acc
+                checkpoint = {
+                                'model': model.module.state_dict() if hasattr(model, "module") else model.state_dict(),
+                                'optimizer': optimizer.state_dict(),
+                                'lr_sched': lr_scheduler.state_dict(),
+                                'epoch': epoch,
+                                'best_epoch': best_epoch,
+                                'max_eval_acc': max_eval_acc,
+                                "labels": label2idx
+                                }
+                torch.save(checkpoint, WEIGHTS_PATH)
+                with open(LOGS_PATH,'a') as f:
+                    f.write("Best Epoch so far!\n\n\n")
+        if world_size > 1:
+            dist.barrier()
+    if global_rank==0:
+        print("Best Eval Epoch : "+str(best_epoch))
+        print("Max Eval Accuracy : "+str(max_eval_acc))

music_classification/utils.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import torch
+from config import *
+class LinearClassification(torch.nn.Module):
+    def __init__(self, num_classes):
+        super(LinearClassification, self).__init__()
+        self.fc1 = torch.nn.Linear(INPUT_HIDDEN_SIZE, HIDDEN_SIZE)
+        self.relu = torch.nn.ReLU()
+        self.fc2 = torch.nn.Linear(HIDDEN_SIZE, num_classes)
+        self.softmax = torch.nn.Softmax(dim=1)
+    def forward(self, x):
+        # Apply the linear layer and ReLU to each time step
+        x = self.fc1(x)  # x shape (B, L, H) -> (B, L, hidden_size)
+        x = self.relu(x)
+        # Average over the time steps (L dimension)
+        x = x.mean(dim=1)  # Now x has shape (B, hidden_size)
+        x = self.fc2(x)    # Now applying the final layer (B, hidden_size) -> (B, num_classes)
+        x = self.softmax(x)
+        return x

process_data/README.md ADDED Viewed

	@@ -0,0 +1,307 @@

+# Data Processing Database
+## Overview
+This codebase contains scripts and utilities for converting between various musical data formats, including ABC notation, MusicXML, MIDI, and MTF (MIDI Text Format). Additionally, it includes a script for summarizing music metadata, which is represented in JSON format containing textual information, using the OpenAI GPT-4 API. The GPT-4 model processes this metadata to generate concise summaries in multiple languages to boost multilingual MIR. These tools are designed to facilitate the transformation and manipulation of musical files, as well as to provide concise multilingual summaries of music metadata for use with CLaMP 2.
+## About ABC notation
+### Standard ABC Notation
+ABC notation (sheet music), a text-based sheet music representation like stave notation, is theory-oriented and ideal for presenting complex musical concepts to musicians for study and analysis. Standard ABC notation encodes each voice separately, which often results in corresponding bars being spaced far apart. This separation makes it difficult for models to accurately understand the interactions between voices in sheet music that are meant to align musically.
+Example Standard ABC notation representation:
+```
+%%score { 1 | 2 }
+L:1/8
+Q:1/4=120
+M:3/4
+K:G
+V:1 treble nm="Piano" snm="Pno."
+V:2 bass
+V:1
+!mf!"^Allegro" d2 (GA Bc | d2) .G2 .G2 |]
+V:2
+[G,B,D]4 A,2 | B,6 |]
+```
+### Interleaved ABC Notation
+In contrast, interleaved ABC notation effectively aligns multi-track music by integrating multiple voices of the same bar into a single line, ensuring that all parts remain synchronized. This format combines voices in-line and tags each bar with its corresponding voice (e.g., `[V:1]` for treble and `[V:2]` for bass). By directly aligning related bars, interleaved ABC notation enhances the model’s understanding of how different voices interact within the same bar.
+Below is the same data optimized with M3 encoding, where each bar or header corresponds to a patch:
+```
+%%score { 1 | 2 }
+L:1/8
+Q:1/4=120
+M:3/4
+K:G
+V:1 treble nm="Piano" snm="Pno."
+V:2 bass
+[V:1]!mf!"^Allegro" d2 (GA Bc|[V:2][G,B,D]4 A,2|
+[V:1]d2) .G2 .G2|][V:2]B,6|]
+```
+## About MTF
+### Raw MIDI Messages
+MIDI (performance data) precisely encodes performance information related to timing and dynamics, thus suitable for music production and live performance. Raw MIDI messages contain essential musical instructions and metadata, extracted directly from a MIDI file. These include events like note on/off, tempo changes, key signatures, and control changes, which define how the music is performed. The [mido library](https://mido.readthedocs.io/) allows for reading these messages in their native format, as seen below. Each message can include multiple parameters, making the output comprehensive but sometimes redundant.
+```
+MetaMessage ('time_signature', numerator=3, denominator=4, clocks_per_click=24, notated_32nd_notes_per_beat=8, time=0)
+MetaMessage('key_signature', key='G', time=0)
+MetaMessage('set_tempo', tempo=500000, time=0)
+control_change channel=0 control=121 value=0 time=0
+program_change channel=0 program=0 time=0
+control_change channel=0 control=7 value=100 time=0
+control_change channel=0 control=10 value=64 time=0
+control_change channel=0 control=91 value=0 time=0
+control_change channel=0 control=93 value=0 time=0
+MetaMessage('midi_port', port=0, time=0)
+note_on channel=0 note=74 velocity=80 time=0
+MetaMessage('key_signature', key='G', time=0)
+MetaMessage('midi_port', port=0, time=0)
+note_on channel=0 note=55 velocity=80 time=0
+note_on channel=0 note=59 velocity=80 time=0
+note_on channel=0 note=62 velocity=80 time=0
+note_on channel=0 note=74 velocity=0 time=455
+note_on channel=0 note=67 velocity=80 time=25
+note_on channel=0 note=67 velocity=0 time=239
+note_on channel=0 note=69 velocity=80 time=1
+note_on channel=0 note=55 velocity=0 time=191
+note_on channel=0 note=59 velocity=0 time=0
+note_on channel=0 note=62 velocity=0 time=0
+note_on channel=0 note=69 velocity=0 time=48
+note_on channel=0 note=71 velocity=80 time=1
+note_on channel=0 note=57 velocity=80 time=0
+note_on channel=0 note=71 velocity=0 time=239
+note_on channel=0 note=72 velocity=80 time=1
+note_on channel=0 note=57 velocity=0 time=215
+note_on channel=0 note=72 velocity=0 time=24
+note_on channel=0 note=74 velocity=80 time=1
+note_on channel=0 note=59 velocity=80 time=0
+note_on channel=0 note=74 velocity=0 time=455
+note_on channel=0 note=67 velocity=80 time=25
+note_on channel=0 note=67 velocity=0 time=239
+note_on channel=0 note=67 velocity=80 time=241
+note_on channel=0 note=67 velocity=0 time=239
+note_on channel=0 note=59 velocity=0 time=168
+MetaMessage('end_of_track', time=1)
+```
+### MIDI Text Format (MTF)
+The MIDI Text Format (MTF) provides a structured, textual representation of MIDI data that preserves all original information without loss. Each MIDI message is accurately represented, allowing full reconstruction, ensuring no musical nuances are overlooked during conversion.
+To generate MTF, the mido library reads raw MIDI messages from MIDI files. The output retains all essential information but can be lengthy and redundant. To simplify the representation, parameter values are read in a fixed order and separated by spaces. For example, the raw time signature message, which contains several parameters—numerator, denominator, clocks per click, notated 32nd notes per beat, and time—is represented in MTF as:
+```
+time_signature 3 4 24 8 0
+```
+Other messages, such as control changes and note events, follow a similar compact format while preserving all relevant musical details. This structured simplification improves computational performance and maintains precise control over musical elements, including timing and dynamics.
+Example MTF representation:
+```
+ticks_per_beat 480
+time_signature 3 4 24 8 0
+key_signature G 0
+set_tempo 500000 0
+control_change 0 0 121 0
+program_change 0 0 0
+control_change 0 0 7 100
+control_change 0 0 10 64
+control_change 0 0 91 0
+control_change 0 0 93 0
+midi_port 0 0
+note_on 0 0 74 80
+key_signature G 0
+midi_port 0 0
+note_on 0 0 55 80
+note_on 0 0 59 80
+note_on 0 0 62 80
+note_on 455 0 74 0
+note_on 25 0 67 80
+note_on 239 0 67 0
+note_on 1 0 69 80
+note_on 191 0 55 0
+note_on 0 0 59 0
+note_on 0 0 62 0
+note_on 48 0 69 0
+note_on 1 0 71 80
+note_on 0 0 57 80
+note_on 239 0 71 0
+note_on 1 0 72 80
+note_on 215 0 57 0
+note_on 24 0 72 0
+note_on 1 0 74 80
+note_on 0 0 59 80
+note_on 455 0 74 0
+note_on 25 0 67 80
+note_on 239 0 67 0
+note_on 241 0 67 80
+note_on 239 0 67 0
+note_on 168 0 59 0
+end_of_track 1
+```
+For simplicity, `ticks_per_beat`, though originally an attribute of MIDI objects in mido, is included as the first message at the beginning of the MTF representation.
+### M3-Encoded MTF
+When processed using M3 encoding, consecutive messages of the same type that fit within a 64-character limit (the patch size of M3) are combined into a single line. Only the first message in each group specifies the type, with subsequent messages listing only the parameter values separated by tabs. This further simplifies the representation and improves processing efficiency.
+Below is the same data optimized with M3 encoding, where each line corresponds to a patch:
+```
+ticks_per_beat 480
+time_signature 3 4 24 8 0
+key_signature G 0
+set_tempo 500000 0
+control_change 0 0 121 0
+program_change 0 0 0
+control_change 0 0 7 100\t0 0 10 64\t0 0 91 0\t0 0 93 0
+midi_port 0 0
+note_on 0 0 74 80
+key_signature G 0
+midi_port 0 0
+note_on 0 0 55 80\t0 0 59 80\t0 0 62 80\t455 0 74 0\t25 0 67 80
+note_on 239 0 67 0\t1 0 69 80\t191 0 55 0\t0 0 59 0\t0 0 62 0
+note_on 48 0 69 0\t1 0 71 80\t0 0 57 80\t239 0 71 0\t1 0 72 80
+note_on 215 0 57 0\t24 0 72 0\t1 0 74 80\t0 0 59 80\t455 0 74 0
+note_on 25 0 67 80\t239 0 67 0\t0 67 80\t239 0 67 0\t168 0 59 0
+end_of_track 1
+```
+By reducing redundancy, M3 encoding ensures improved computational performance while maintaining precise timing and musical control, making it an ideal choice for efficient MIDI processing.
+## Repository Structure
+The `process_data/` folder includes the following scripts and utility files:
+### 1. **Conversion Scripts**
+#### `batch_abc2xml.py`
+- **Purpose**: Converts ABC notation files into MusicXML format.
+- **Input**: Directory of interleaved ABC files (modify the `input_dir` variable in the code).
+- **Output**: MusicXML files saved in a newly created `_xml` directory.
+- **Logging**: Errors are logged to `logs/abc2xml_error_log.txt`.
+#### `batch_xml2abc.py`
+- **Purpose**: Converts MusicXML files into standard ABC notation format.
+- **Input**: Directory of MusicXML files (e.g., `.xml`, `.mxl`, `.musicxml`) (modify the `input_dir` variable in the code).
+- **Output**: Standard ABC files saved in a newly created `_abc` directory.
+- **Logging**: Errors are logged to `logs/xml2abc_error_log.txt`.
+#### `batch_interleaved_abc.py`
+- **Purpose**: Processes standard ABC notation files into interleaved ABC notation.
+- **Input**: Directory of ABC files (modify the `input_dir` variable in the code).
+- **Output**: Interleaved ABC files saved in a newly created `_interleaved` directory.
+- **Logging**: Any processing errors are printed to the console.
+#### `batch_midi2mtf.py`
+- **Purpose**: Converts MIDI files into MIDI Text Format (MTF).
+- **Input**: Directory of MIDI files (e.g., `.mid`, `.midi`) (modify the `input_dir` variable in the code).
+- **Output**: MTF files saved in a newly created `_mtf` directory.
+- **Logging**: Errors are logged to `logs/midi2mtf_error_log.txt`.
+- **Note**: The script includes an `m3_compatible` variable, which is set to `True` by default. When `True`, the conversion omits messages whose parameters are strings or lists to eliminate potential natural language information. This ensures that the converted MTF files align with the data format used for training the M3 and CLaMP 2 pretrained weights.
+#### `batch_mtf2midi.py`
+- **Purpose**: Converts MTF files into MIDI format.
+- **Input**: Directory of MTF files (modify the `input_dir` variable in the code).
+- **Output**: MIDI files saved in a newly created `_midi` directory.
+- **Logging**: Errors are logged to `logs/mtf2midi_error_log.txt`.
+### 2. **Summarization Script**
+#### `gpt4_summarize.py`
+- **Purpose**: Utilizes the OpenAI GPT-4 API to generate concise summaries of music metadata in multiple languages. The script filters out any entries that lack sufficient musical information to ensure meaningful summaries are produced.
+- **Input**: Directory of JSON files containing music metadata (modify the `input_dir` variable in the code). For any missing metadata fields, the corresponding keys can be set to `None`. Each JSON file corresponds to a single musical composition and can be linked to both ABC notation and MTF formats. Here’s an example of the required metadata format:
+  ```json
+  {
+    "title": "Hard Times Come Again No More",
+    "composer": "Stephen Foster",
+    "genres": ["Children's Music", "Folk"],
+    "description": "\"Hard Times Come Again No More\" (sometimes referred to as \"Hard Times\") is an American parlor song written by Stephen Foster, reflecting themes of sorrow and hope.",
+    "lyrics": "Let us pause in life's pleasures and count its many tears,\nWhile we all sup sorrow with the poor;\nThere's a song that will linger forever in our ears;\nOh! Hard times come again no more.\n\nChorus:\n'Tis the song, the sigh of the weary,\nHard Times, hard times, come again no more.\nMany days you have lingered around my cabin door;\nOh! Hard times come again no more.\n\nWhile we seek mirth and beauty and music light and gay,\nThere are frail forms fainting at the door;\nThough their voices are silent, their pleading looks will say\nOh! Hard times come again no more.\nChorus\n\nThere's a pale weeping maiden who toils her life away,\nWith a worn heart whose better days are o'er:\nThough her voice would be merry, 'tis sighing all the day,\nOh! Hard times come again no more.\nChorus\n\n'Tis a sigh that is wafted across the troubled wave,\n'Tis a wail that is heard upon the shore\n'Tis a dirge that is murmured around the lowly grave\nOh! Hard times come again no more.\nChorus",
+    "tags": ["folk", "traditional", "bluegrass", "nostalgic", "heartfelt", "acoustic", "melancholic", "storytelling", "American roots", "resilience"],
+    "ensembles": ["Folk Ensemble"],
+    "instruments": ["Vocal", "Violin", "Tin whistle", "Guitar", "Banjo", "Tambourine"],
+    "filepaths": [
+      "abc/American_Music/Folk_Traditions/19th_Century/Stephen_Foster/Hard_Times_Come_Again_No_More.abc",
+      "mtf/American_Music/Folk_Traditions/19th_Century/Stephen_Foster/Hard_Times_Come_Again_No_More.mtf"
+    ]
+  }
+  ```
+- **Output**: JSON files containing structured summaries in both English and a randomly selected non-English language, chosen from a selection of 100 different non-English languages (in this case, Simplified Chinese). Here’s an example of the expected output format:
+```json
+{
+  "title": "Hard Times Come Again No More",
+  "composer": "Stephen Foster",
+  "genres": ["Children's Music", "Folk"],
+  "description": "\"Hard Times Come Again No More\" (sometimes referred to as \"Hard Times\") is an American parlor song written by Stephen Foster, reflecting themes of sorrow and hope.",
+  "lyrics": "Let us pause in life's pleasures and count its many tears,\nWhile we all sup sorrow with the poor;\nThere's a song that will linger forever in our ears;\nOh! Hard times come again no more.\n\nChorus:\n'Tis the song, the sigh of the weary,\nHard Times, hard times, come again no more.\nMany days you have lingered around my cabin door;\nOh! Hard times come again no more.\n\nWhile we seek mirth and beauty and music light and gay,\nThere are frail forms fainting at the door;\nThough their voices are silent, their pleading looks will say\nOh! Hard times come again no more.\nChorus\n\nThere's a pale weeping maiden who toils her life away,\nWith a worn heart whose better days are o'er:\nThough her voice would be merry, 'tis sighing all the day,\nOh! Hard times come again no more.\nChorus\n\n'Tis a sigh that is wafted across the troubled wave,\n'Tis a wail that is heard upon the shore\n'Tis a dirge that is murmured around the lowly grave\nOh! Hard times come again no more.\nChorus",
+  "tags": ["folk", "traditional", "bluegrass", "nostalgic", "heartfelt", "acoustic", "melancholic", "storytelling", "American roots", "resilience"],
+  "ensembles": ["Folk Ensemble"],
+  "instruments": ["Vocal", "Violin", "Tin whistle", "Guitar", "Banjo", "Tambourine"],
+  "summary_en": "\"Hard Times Come Again No More,\" composed by Stephen Foster, is a poignant American parlor song that explores themes of sorrow and hope. The lyrics reflect on the contrast between life's pleasures and its hardships, inviting listeners to acknowledge both joy and suffering. With a heartfelt chorus that repeats the line \"Hard times come again no more,\" the song resonates with nostalgia and resilience. It is often performed by folk ensembles and features a variety of instruments, including vocals, violin, guitar, and banjo, encapsulating the spirit of American roots music.",
+  "summary_nen": {
+    "language": "Chinese (Simplified)",
+    "summary": "《艰难时光再无来临》是斯蒂芬·福斯特创作的一首感人至深的美国小歌厅歌曲，探讨了悲伤与希望的主题。歌词展现了生活的乐趣与艰辛之间的对比，邀请听众去感受快乐与痛苦的交织。歌曲中那句反复吟唱的“艰难时光再无来临”深情地表达了怀旧与坚韧。它常常由民谣乐队演奏，伴随着人声、小提琴、吉他和班卓琴等多种乐器，生动地展现了美国根源音乐的独特魅力。"
+  },
+  "filepaths": [
+    "abc/American_Music/Folk_Traditions/19th_Century/Stephen_Foster/Hard_Times_Come_Again_No_More.abc",
+    "mtf/American_Music/Folk_Traditions/19th_Century/Stephen_Foster/Hard_Times_Come_Again_No_More.mtf"
+  ]
+}
+```
+- **Logging**: Errors are logged to `logs/gpt4_summarize_error_log.txt`.
+### 3. **Utilities**
+- **`utils/`**: Contains utility files required for the conversion processes.
+## Usage
+To use the scripts, modify the `input_dir` variable in each script to point to the directory containing your input files. Then run the script from the command line. Below are example commands for each script:
+### Example Commands
+```bash
+# Modify the input_dir variable in the script before running
+python batch_abc2xml.py
+python batch_xml2abc.py
+python batch_interleaved_abc.py
+python batch_midi2mtf.py
+python batch_mtf2midi.py
+python gpt4_summarize.py
+```
+### Execution Order
+To achieve specific conversions, follow the order below:
+1. **To obtain interleaved ABC notation**:
+   - First, run `batch_xml2abc.py` to convert MusicXML files to ABC notation.
+   - Then, run `batch_interleaved_abc.py` to process the ABC files into interleaved ABC notation.
+2. **To obtain MTF**:
+   - Run `batch_midi2mtf.py` to convert MIDI files into MTF.
+3. **To convert interleaved ABC back to XML**:
+   - Run `batch_xml2abc.py` on the interleaved ABC files to convert them back to MusicXML format.
+4. **To convert MTF back to MIDI**:
+   - Run `batch_mtf2midi.py` to convert MTF files back to MIDI format.
+5. **To summarize music metadata**:
+   - Run `gpt4_summarize.py` to generate summaries for the music metadata files in JSON format. This assumes you have a directory of JSON files that includes a `filepaths` key, which connects to the corresponding interleaved ABC and MTF files.
+### Parameters
+To run the scripts, you need to configure the following parameters:
+- **`input_dir`**: This variable should be set to the directory containing the input files to be processed (such as ABC, MusicXML, MIDI, MTF, or JSON files), which is shared across all scripts.
+In addition to **`input_dir`**, the following parameters are specific to certain scripts:
+- **`m3_compatible`** (specific to `batch_midi2mtf.py`):
+  - Default is `True`, which omits messages with parameters that are strings or lists to avoid including potential natural language information.
+  - Setting this to `False` retains all MIDI messages, which is crucial for those planning to retrain models on custom datasets or needing precise MIDI reproduction.
+For **`gpt4_summarize.py`**, you also need to configure these parameters:
+1. **`base_url`**: The base URL for the OpenAI API, used to initialize the client.
+2. **`api_key`**: Your API key for authenticating requests, required for client initialization.
+3. **`model`**: The GPT-4 model to use, specified when generating summaries.
+  **Important**: When `m3_compatible` is set to `True`, the conversion back from MTF to MIDI using `batch_mtf2midi.py` may produce MIDI files that do not exactly match the original MIDI files. This discrepancy is unexpected; however, retraining both M3 and CLaMP 2 to address this issue would require approximately 6000 hours of H800 GPU hours. Considering that M3 and CLaMP 2 have already achieved state-of-the-art results on MIDI tasks, we have opted not to retrain. Therefore, if consistency with original MIDI files is critical for your application, it is advisable to set `m3_compatible` to `False`.

process_data/batch_abc2xml.py ADDED Viewed

	@@ -0,0 +1,58 @@

+input_dir = "<path_to_your_interleaved_abc_files>"  # Replace with the path to your folder containing interleaved ABC (.abc) files
+import os
+import math
+import random
+import subprocess
+from tqdm import tqdm
+from multiprocessing import Pool
+def convert_abc2xml(file_list):
+    cmd = 'cmd /u /c python utils/abc2xml.py '
+    for file in tqdm(file_list):
+        filename = file.split('/')[-1]  # Extract file name
+        output_dir = file.split('/')[:-1]  # Extract directory path
+        output_dir[0] = output_dir[0] + '_xml'  # Create new output folder
+        output_dir = '/'.join(output_dir)
+        os.makedirs(output_dir, exist_ok=True)
+        try:
+            p = subprocess.Popen(cmd + '"' + file + '"', stdout=subprocess.PIPE, shell=True)
+            result = p.communicate()
+            output = result[0].decode('utf-8')
+            if output == '':
+                with open("logs/abc2xml_error_log.txt", "a", encoding="utf-8") as f:
+                    f.write(file + '\n')
+                continue
+            else:
+                output_path = f"{output_dir}/" + ".".join(filename.split(".")[:-1]) + ".xml"
+                with open(output_path, 'w', encoding='utf-8') as f:
+                    f.write(output)
+        except Exception as e:
+            with open("logs/abc2xml_error_log.txt", "a", encoding="utf-8") as f:
+                f.write(file + ' ' + str(e) + '\n')
+            pass
+if __name__ == '__main__':
+    file_list = []
+    os.makedirs("logs", exist_ok=True)
+    # Traverse the specified folder for ABC files
+    for root, dirs, files in os.walk(input_dir):
+        for file in files:
+            if not file.endswith(".abc"):
+                continue
+            filename = os.path.join(root, file).replace("\\", "/")
+            file_list.append(filename)
+    # Prepare for multiprocessing
+    file_lists = []
+    random.shuffle(file_list)
+    for i in range(os.cpu_count()):
+        start_idx = int(math.floor(i * len(file_list) / os.cpu_count()))
+        end_idx = int(math.floor((i + 1) * len(file_list) / os.cpu_count()))
+        file_lists.append(file_list[start_idx:end_idx])
+    pool = Pool(processes=os.cpu_count())
+    pool.map(convert_abc2xml, file_lists)

process_data/batch_interleaved_abc.py ADDED Viewed

	@@ -0,0 +1,117 @@

+input_dir = "<path_to_your_abc_files>"  # Replace with the path to your folder containing standard ABC (.abc) files
+import os
+import re
+import random
+from multiprocessing import Pool
+from tqdm import tqdm
+from abctoolkit.utils import (
+    find_all_abc,
+    remove_information_field,
+    remove_bar_no_annotations,
+    Quote_re,
+    Barlines,
+    strip_empty_bars
+)
+from abctoolkit.rotate import rotate_abc
+from abctoolkit.check import check_alignment_unrotated
+def abc_pipeline(abc_path, input_dir, output_dir):
+    """
+    Converts standard ABC notation to interleaved ABC notation.
+    """
+    with open(abc_path, 'r', encoding='utf-8') as f:
+        abc_lines = f.readlines()
+    abc_lines = [line for line in abc_lines if line.strip() != '']
+    abc_lines = remove_information_field(
+        abc_lines=abc_lines,
+        info_fields=['X:', 'T:', 'C:', 'W:', 'w:', 'Z:', '%%MIDI']
+    )
+    abc_lines = remove_bar_no_annotations(abc_lines)
+    # Remove escaped quotes and clean up barlines inside quotes
+    for i, line in enumerate(abc_lines):
+        if not (re.search(r'^[A-Za-z]:', line) or line.startswith('%')):
+            abc_lines[i] = line.replace(r'\"', '')
+            quote_contents = re.findall(Quote_re, line)
+            for quote_content in quote_contents:
+                for barline in Barlines:
+                    if barline in quote_content:
+                        line = line.replace(quote_content, '')
+                        abc_lines[i] = line
+    try:
+        stripped_abc_lines, bar_counts = strip_empty_bars(abc_lines)
+    except Exception as e:
+        print(abc_path, 'Error in stripping empty bars:', e)
+        return
+    if stripped_abc_lines is None:
+        print(abc_path, 'Failed to strip')
+        return
+    # Check alignment
+    _, bar_no_equal_flag, bar_dur_equal_flag = check_alignment_unrotated(stripped_abc_lines)
+    if not bar_no_equal_flag:
+        print(abc_path, 'Unequal bar number')
+    if not bar_dur_equal_flag:
+        print(abc_path, 'Unequal bar duration (unaligned)')
+    # Construct the output path, maintaining input folder structure
+    relative_path = os.path.relpath(abc_path, input_dir)  # Get relative path from input dir
+    output_file_path = os.path.join(output_dir, relative_path)  # Recreate output path
+    os.makedirs(os.path.dirname(output_file_path), exist_ok=True)  # Ensure output folder exists
+    try:
+        rotated_abc_lines = rotate_abc(stripped_abc_lines)
+    except Exception as e:
+        print(abc_path, 'Error in rotating:', e)
+        return
+    if rotated_abc_lines is None:
+        print(abc_path, 'Failed to rotate')
+        return
+    with open(output_file_path, 'w', encoding='utf-8') as w:
+        w.writelines(rotated_abc_lines)
+def abc_pipeline_list(abc_path_list, input_dir, output_dir):
+    for abc_path in tqdm(abc_path_list):
+        try:
+            abc_pipeline(abc_path, input_dir, output_dir)
+        except Exception as e:
+            print(abc_path, e)
+            pass
+def batch_abc_pipeline(input_dir):
+    """
+    Batch process all ABC files from `input_dir`, converting them to interleaved notation.
+    """
+    output_dir = input_dir + "_interleaved"
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir, exist_ok=True)
+    abc_path_list = []
+    for abc_path in find_all_abc(input_dir):
+        if os.path.getsize(abc_path) > 0:
+            abc_path_list.append(abc_path)
+    random.shuffle(abc_path_list)
+    print(f"Found {len(abc_path_list)} ABC files.")
+    num_cpus = os.cpu_count()
+    split_lists = [[] for _ in range(num_cpus)]
+    index = 0
+    for abc_path in abc_path_list:
+        split_lists[index].append(abc_path)
+        index = (index + 1) % num_cpus
+    pool = Pool(processes=num_cpus)
+    pool.starmap(
+        abc_pipeline_list,
+        [(split, input_dir, output_dir) for split in split_lists]
+    )
+if __name__ == '__main__':
+    batch_abc_pipeline(input_dir)

process_data/batch_midi2mtf.py ADDED Viewed

	@@ -0,0 +1,80 @@

+input_dir = "<path_to_your_midi_files>"  # Replace with the path to your folder containing MIDI (.midi, .mid) files
+m3_compatible = True  # Set to True for M3 compatibility; set to False to retain all MIDI information during conversion.
+import os
+import math
+import mido
+import random
+from tqdm import tqdm
+from multiprocessing import Pool
+def msg_to_str(msg):
+    str_msg = ""
+    for key, value in msg.dict().items():
+        str_msg += " " + str(value)
+    return str_msg.strip().encode('unicode_escape').decode('utf-8')
+def load_midi(filename):
+    # Load a MIDI file
+    mid = mido.MidiFile(filename)
+    msg_list = ["ticks_per_beat " + str(mid.ticks_per_beat)]
+    # Traverse the MIDI file
+    for msg in mid.merged_track:
+        if m3_compatible:
+            if msg.is_meta:
+                if msg.type in ["text", "copyright", "track_name", "instrument_name",
+                                "lyrics", "marker", "cue_marker", "device_name", "sequencer_specific"]:
+                    continue
+            else:
+                if msg.type in ["sysex"]:
+                    continue
+        str_msg = msg_to_str(msg)
+        msg_list.append(str_msg)
+    return "\n".join(msg_list)
+def convert_midi2mtf(file_list):
+    for file in tqdm(file_list):
+        filename = file.split('/')[-1]
+        output_dir = file.split('/')[:-1]
+        output_dir[0] = output_dir[0] + '_mtf'
+        output_dir = '/'.join(output_dir)
+        os.makedirs(output_dir, exist_ok=True)
+        try:
+            output = load_midi(file)
+            if output == '':
+                with open('logs/midi2mtf_error_log.txt', 'a', encoding='utf-8') as f:
+                    f.write(file + '\n')
+                continue
+            else:
+                with open(output_dir + "/" + ".".join(filename.split(".")[:-1]) + '.mtf', 'w', encoding='utf-8') as f:
+                    f.write(output)
+        except Exception as e:
+            with open('logs/midi2mtf_error_log.txt', 'a', encoding='utf-8') as f:
+                f.write(file + " " + str(e) + '\n')
+            pass
+if __name__ == '__main__':
+    file_list = []
+    os.makedirs("logs", exist_ok=True)
+    # Traverse the specified folder for MIDI files
+    for root, dirs, files in os.walk(input_dir):
+        for file in files:
+            if not file.endswith(".mid") and not file.endswith(".midi"):
+                continue
+            filename = os.path.join(root, file).replace("\\", "/")
+            file_list.append(filename)
+    # Prepare for multiprocessing
+    file_lists = []
+    random.shuffle(file_list)
+    for i in range(os.cpu_count()):
+        start_idx = int(math.floor(i * len(file_list) / os.cpu_count()))
+        end_idx = int(math.floor((i + 1) * len(file_list) / os.cpu_count()))
+        file_lists.append(file_list[start_idx:end_idx])
+    pool = Pool(processes=os.cpu_count())
+    pool.map(convert_midi2mtf, file_lists)

process_data/batch_mtf2midi.py ADDED Viewed

	@@ -0,0 +1,97 @@

+input_dir = "<path_to_your_mtf_files>"  # Replace with the path to your folder containing MTF (.mtf) files
+import os
+import math
+import mido
+import random
+from tqdm import tqdm
+from multiprocessing import Pool
+def str_to_msg(str_msg):
+    type = str_msg.split(" ")[0]
+    try:
+        msg = mido.Message(type)
+    except:
+        msg = mido.MetaMessage(type)
+    if type in ["text", "copyright", "track_name", "instrument_name",
+                "lyrics", "marker", "cue_marker", "device_name"]:
+        values = [type, " ".join(str_msg.split(" ")[1:-1]).encode('utf-8').decode('unicode_escape'), str_msg.split(" ")[-1]]
+    elif "[" in str_msg or "(" in str_msg:
+        is_bracket = "[" in str_msg
+        left_idx = str_msg.index("[") if is_bracket else str_msg.index("(")
+        right_idx = str_msg.index("]") if is_bracket else str_msg.index(")")
+        list_str = [int(num) for num in str_msg[left_idx+1:right_idx].split(", ")]
+        if not is_bracket:
+            list_str = tuple(list_str)
+        values = str_msg[:left_idx].split(" ") + [list_str] + str_msg[right_idx+1:].split(" ")
+        values = [value for value in values if value != ""]
+    else:
+        values = str_msg.split(" ")
+    if len(values) != 1:
+        for idx, (key, content) in enumerate(msg.__dict__.items()):
+            if key == "type":
+                continue
+            value = values[idx]
+            if isinstance(content, int) or isinstance(content, float):
+                float_value = float(value)
+                value = float_value
+                if value % 1 == 0:
+                    value = int(value)
+            setattr(msg, key, value)
+    return msg
+def convert_mtf2midi(file_list):
+    for file in tqdm(file_list):
+        filename = file.split('/')[-1]
+        output_dir = file.split('/')[:-1]
+        output_dir[0] = output_dir[0] + '_midi'
+        output_dir = '/'.join(output_dir)
+        os.makedirs(output_dir, exist_ok=True)
+        try:
+            with open(file, 'r', encoding='utf-8') as f:
+                msg_list = f.read().splitlines()
+            # Build a new MIDI file based on the MIDI messages
+            new_mid = mido.MidiFile()
+            new_mid.ticks_per_beat = int(msg_list[0].split(" ")[1])
+            track = mido.MidiTrack()
+            new_mid.tracks.append(track)
+            for msg in msg_list[1:]:
+                if "unknown_meta" in msg:
+                    continue
+                new_msg = str_to_msg(msg)
+                track.append(new_msg)
+            output_file_path = os.path.join(output_dir, os.path.basename(file).replace('.mtf', '.mid'))
+            new_mid.save(output_file_path)
+        except Exception as e:
+            with open('logs/mtf2midi_error_log.txt', 'a', encoding='utf-8') as f:
+                f.write(f"Error processing {file}: {str(e)}\n")
+if __name__ == '__main__':
+    file_list = []
+    os.makedirs("logs", exist_ok=True)
+    # Traverse the specified folder for MTF files
+    for root, dirs, files in os.walk(input_dir):
+        for file in files:
+            if not file.endswith(".mtf"):
+                continue
+            filename = os.path.join(root, file).replace("\\", "/")
+            file_list.append(filename)
+    # Prepare for multiprocessing
+    file_lists = []
+    random.shuffle(file_list)
+    for i in range(os.cpu_count()):
+        start_idx = int(math.floor(i * len(file_list) / os.cpu_count()))
+        end_idx = int(math.floor((i + 1) * len(file_list) / os.cpu_count()))
+        file_lists.append(file_list[start_idx:end_idx])
+    pool = Pool(processes=os.cpu_count())
+    pool.map(convert_mtf2midi, file_lists)

process_data/batch_xml2abc.py ADDED Viewed

	@@ -0,0 +1,57 @@

+input_dir = "<path_to_your_xml_files>"  # Replace with the path to your folder containing XML (.xml, .mxl, .musicxml) files
+import os
+import math
+import random
+import subprocess
+from tqdm import tqdm
+from multiprocessing import Pool
+def convert_xml2abc(file_list):
+    cmd = 'cmd /u /c python utils/xml2abc.py -d 8 -x '
+    for file in tqdm(file_list):
+        filename = file.split('/')[-1]
+        output_dir = file.split('/')[:-1]
+        output_dir[0] = output_dir[0] + '_abc'
+        output_dir = '/'.join(output_dir)
+        os.makedirs(output_dir, exist_ok=True)
+        try:
+            p = subprocess.Popen(cmd + '"' + file + '"', stdout=subprocess.PIPE, shell=True)
+            result = p.communicate()
+            output = result[0].decode('utf-8')
+            if output == '':
+                with open("logs/xml2abc_error_log.txt", "a", encoding="utf-8") as f:
+                    f.write(file + '\n')
+                continue
+            else:
+                with open(output_dir + '/' + ".".join(filename.split(".")[:-1]) + '.abc', 'w', encoding='utf-8') as f:
+                    f.write(output)
+        except Exception as e:
+            with open("logs/xml2abc_error_log.txt", "a", encoding="utf-8") as f:
+                f.write(file + ' ' + str(e) + '\n')
+            pass
+if __name__ == '__main__':
+    file_list = []
+    os.makedirs("logs", exist_ok=True)
+    # Traverse the specified folder for XML/MXL files
+    for root, dirs, files in os.walk(input_dir):
+        for file in files:
+            if not file.endswith((".mxl", ".xml", ".musicxml")):
+                continue
+            filename = os.path.join(root, file).replace("\\", "/")
+            file_list.append(filename)
+    # Prepare for multiprocessing
+    file_lists = []
+    random.shuffle(file_list)
+    for i in range(os.cpu_count()):
+        start_idx = int(math.floor(i * len(file_list) / os.cpu_count()))
+        end_idx = int(math.floor((i + 1) * len(file_list) / os.cpu_count()))
+        file_lists.append(file_list[start_idx:end_idx])
+    pool = Pool(processes=os.cpu_count())
+    pool.map(convert_xml2abc, file_lists)

process_data/gpt4_summarize.py ADDED Viewed

	@@ -0,0 +1,250 @@

+input_dir = "<path_to_your_metadata_json_files>"  # Replace with the path to your folder containing metadata (.json) files
+base_url = "<your_base_url>"  # Replace with the base URL for the API
+api_key = "<your_api_key>"  # Replace with your API key
+model = "<your_model>"  # Replace with your model name
+import os
+import json
+import random
+from openai import OpenAI
+# Initialize the OpenAI client
+client = OpenAI(base_url=base_url, api_key=api_key)
+def log_error(file_path, error_message):
+    """Logs error messages to a specified log file."""
+    os.makedirs("logs", exist_ok=True)
+    with open("logs/gpt4_summarize_error_log.txt", 'a', encoding='utf-8') as log_file:
+        log_file.write(f"Error processing {file_path}: {error_message}\n")
+def process_json(metadata, language):
+    """
+    Processes the given metadata of a music piece using GPT-4 API.
+    This function sends the metadata and target language to the GPT-4 model to generate
+    a structured summary. The summary is provided in both English and the specified
+    non-English language from the 'nen_language' field.
+    If the provided metadata lacks sufficient music-related details, the function returns `None`.
+    Parameters:
+    - metadata (dict): A dictionary containing the metadata of the music piece.
+    - language (str): The target non-English language for the summary.
+    Returns:
+    - str: A JSON-formatted string containing the English and non-English summaries,
+      or `None` if there is insufficient information.
+    """
+    system = """Your task is to provide a concise, comprehensive, and coherent summary of the music piece using the provided metadata. Please write the summary in English first, and then write an equivalent summary in the specified non-English language from the "nen_language" field. Use this JSON format:
+{
+  "summary_en": "Your English summary here.",
+  "summary_nen": {
+    "language": "Specified non-English language.",
+    "summary": "Your non-English summary here."
+  }
+If there is not enough music-related information, return `None` instead.
+}
+"""
+    user1 = """{
+    "title": "Brejeiro",
+    "composer": "Ernesto Nazareth",
+    "genres": ["Choro", "Classical", "Instrumental"],
+    "description": "\"Brejeiro\" is in A major and 2/4 time. A joyful melody begins at bar six, and a lively tango rhythm starts at bar fourteen. It has a D.C. al Fine at bar fifty-three and ends on two quarter notes in bar thirty-seven. The piece, with its vibrant melodies and rhythms, reflects celebration and carefreeness, embodying the spirit of Brazilian music.",
+    "tags": ["Brazilian", "Choro", "Piano"],
+    "ensembles": ["Solo Piano", "Small Ensemble"],
+    "instruments": ["Piano"],
+    "nen_language": "Japanese"
+}
+"""
+    assistant1 = """{
+    "summary_en": "Brejeiro, composed by Ernesto Nazareth, is a lively choro piece in A major and 2/4 time. It features a joyful melody that begins at bar six and a vibrant tango rhythm introduced at bar fourteen. The piece includes a D.C. al Fine at bar fifty-three, concluding on two quarter notes in bar thirty-seven. With its themes of celebration and carefreeness, Brejeiro beautifully captures the essence of Brazilian music and is well-suited for solo piano and small ensembles.",
+    "summary_nen": {
+    "language": "Japanese",
+    "summary": "「ブレジェイロ」は、エルネスト・ナザレが作曲した活気あふれるショーロの作品で、イ長調の2/4拍子で書かれています。第6小節から始まる喜びに満ちたメロディーと、第14小節で導入される活気あるタンゴのリズムが特徴です。この曲には、第53小節でのD.C. al Fineが含まれ、また第37小節で二つの四分音符で締めくくられています。「ブレジェイロ」は、お祝いと無邪気さのテーマを持ち、ブラジル音楽の本質を美しく捉えており、ソロピアノや小編成のアンサンブルにぴったりの作品です。"
+  }
+}
+"""
+    user2 = """{
+    "title": "Untitled",
+    "composer": "Unknown",
+    "description": "This is a good song.",
+    "nen_language": "Russian"
+}
+"""
+    assistant2 = "None"
+    filepaths = metadata.pop('filepaths')
+    metadata = {k: v for k, v in metadata.items() if v is not None}
+    metadata["nen_language"] = language
+    metadata = json.dumps(metadata, ensure_ascii=False, indent=4)
+    summaries = client.chat.completions.create(
+        model=model,
+        messages=[
+            {"role": "system", "content": system},
+            {"role": "user", "content": user1},
+            {"role": "assistant", "content": assistant1},
+            {"role": "user", "content": user2},
+            {"role": "assistant", "content": assistant2},
+            {"role": "user", "content": metadata},
+        ]
+    ).choices[0].message.content
+    if summaries == "None":
+        raise ValueError("Received 'None' as summaries response")
+    metadata = json.loads(metadata)
+    summaries = json.loads(summaries)
+    if metadata["nen_language"] == summaries["summary_nen"]["language"]:
+        metadata.pop("nen_language")
+        metadata["summary_en"] = summaries["summary_en"]
+        metadata["summary_nen"] = summaries["summary_nen"]
+        metadata["filepaths"] = filepaths
+        return metadata
+    else:
+        raise ValueError("Language mismatch: nen_language does not match summary_nen language")
+def process_files(input_dir):
+    # Create output directory with _summarized suffix
+    output_dir = input_dir + "_summarized"
+    # Define available languages
+    languages = """Afrikaans
+                    Amharic
+                    Arabic
+                    Assamese
+                    Azerbaijani
+                    Belarusian
+                    Bulgarian
+                    Bengali
+                    Bengali (Romanized)
+                    Breton
+                    Bosnian
+                    Catalan
+                    Czech
+                    Welsh
+                    Danish
+                    German
+                    Greek
+                    Esperanto
+                    Spanish
+                    Estonian
+                    Basque
+                    Persian
+                    Finnish
+                    French
+                    Western Frisian
+                    Irish
+                    Scottish Gaelic
+                    Galician
+                    Gujarati
+                    Hausa
+                    Hebrew
+                    Hindi
+                    Hindi (Romanized)
+                    Croatian
+                    Hungarian
+                    Armenian
+                    Indonesian
+                    Icelandic
+                    Italian
+                    Japanese
+                    Javanese
+                    Georgian
+                    Kazakh
+                    Khmer
+                    Kannada
+                    Korean
+                    Kurdish (Kurmanji)
+                    Kyrgyz
+                    Latin
+                    Lao
+                    Lithuanian
+                    Latvian
+                    Malagasy
+                    Macedonian
+                    Malayalam
+                    Mongolian
+                    Marathi
+                    Malay
+                    Burmese
+                    Burmese (Romanized)
+                    Nepali
+                    Dutch
+                    Norwegian
+                    Oromo
+                    Oriya
+                    Punjabi
+                    Polish
+                    Pashto
+                    Portuguese
+                    Romanian
+                    Russian
+                    Sanskrit
+                    Sindhi
+                    Sinhala
+                    Slovak
+                    Slovenian
+                    Somali
+                    Albanian
+                    Serbian
+                    Sundanese
+                    Swedish
+                    Swahili
+                    Tamil
+                    Tamil (Romanized)
+                    Telugu
+                    Telugu (Romanized)
+                    Thai
+                    Filipino
+                    Turkish
+                    Uyghur
+                    Ukrainian
+                    Urdu
+                    Urdu (Romanized)
+                    Uzbek
+                    Vietnamese
+                    Xhosa
+                    Yiddish
+                    Chinese (Simplified)
+                    Chinese (Traditional)
+                    Cantonese"""
+    languages = [language.strip() for language in languages.split("\n")]
+    # Walk through the input directory
+    for root, _, files in os.walk(input_dir):
+        # Construct the corresponding path in the output folder
+        relative_path = os.path.relpath(root, input_dir)
+        output_path = os.path.join(output_dir, relative_path)
+        # Create the output directory if it doesn't exist
+        os.makedirs(output_path, exist_ok=True)
+        for file in files:
+            if file.endswith('.json'):
+                input_file = os.path.join(root, file)
+                output_file = os.path.join(output_path, file)
+                try:
+                    # Read the JSON file
+                    with open(input_file, 'r', encoding='utf-8') as f:
+                        metadata = json.load(f)
+                    # Randomly select a language from the list of languages
+                    language = random.choice(languages)
+                    # Process the JSON data
+                    processed_metadata = process_json(metadata, language)
+                    # Write the processed JSON to the output file
+                    with open(output_file, 'w', encoding='utf-8') as f:
+                        json.dump(processed_metadata, f, indent=4, ensure_ascii=False)
+                    print(f"Processed: {input_file} -> {output_file}")
+                except Exception as e:
+                    print(f"Failed to process {input_file}: {e}")
+                    log_error(input_file, str(e))
+if __name__ == "__main__":
+    process_files(input_dir)

process_data/utils/abc2xml.py ADDED Viewed

The diff for this file is too large to render. See raw diff

process_data/utils/pyparsing.py ADDED Viewed

The diff for this file is too large to render. See raw diff

process_data/utils/xml2abc.py ADDED Viewed

	@@ -0,0 +1,1582 @@

+#!/usr/bin/env python
+# coding=latin-1
+'''
+Copyright (C) 2012-2018: W.G. Vree
+Contributions: M. Tarenskeen, N. Liberg, Paul Villiger, Janus Meuris, Larry Myerscough,
+Dick Jackson, Jan Wybren de Jong, Mark Zealey.
+This program is free software; you can redistribute it and/or modify it under the terms of the
+Lesser GNU General Public License as published by the Free Software Foundation;
+This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+See the Lesser GNU General Public License for more details. <http://www.gnu.org/licenses/lgpl.html>.
+'''
+try:    import xml.etree.cElementTree as E
+except: import xml.etree.ElementTree as E
+import os, sys, types, re, math
+VERSION = 143
+python3 = sys.version_info.major > 2
+if python3:
+    tupletype = tuple
+    listtype = list
+    max_int = sys.maxsize
+else:
+    tupletype = types.TupleType
+    listtype = types.ListType
+    max_int = sys.maxint
+note_ornamentation_map = {        # for notations/, modified from EasyABC
+    'ornaments/trill-mark':       'T',
+    'ornaments/mordent':          'M',
+    'ornaments/inverted-mordent': 'P',
+    'ornaments/turn':             '!turn!',
+    'ornaments/inverted-turn':    '!invertedturn!',
+    'technical/up-bow':           'u',
+    'technical/down-bow':         'v',
+    'technical/harmonic':         '!open!',
+    'technical/open-string':      '!open!',
+    'technical/stopped':          '!plus!',
+    'technical/snap-pizzicato':   '!snap!',
+    'technical/thumb-position':   '!thumb!',
+    'articulations/accent':       '!>!',
+    'articulations/strong-accent':'!^!',
+    'articulations/staccato':     '.',
+    'articulations/staccatissimo':'!wedge!',
+    'articulations/scoop':        '!slide!',
+    'fermata':                    '!fermata!',
+    'arpeggiate':                 '!arpeggio!',
+    'articulations/tenuto':       '!tenuto!',
+    'articulations/staccatissimo':'!wedge!', # not sure whether this is the right translation
+    'articulations/spiccato':     '!wedge!', # not sure whether this is the right translation
+    'articulations/breath-mark':  '!breath!', # this may need to be tested to make sure it appears on the right side of the note
+    'articulations/detached-legato': '!tenuto!.',
+}
+dynamics_map = {    # for direction/direction-type/dynamics/
+    'p':    '!p!',
+    'pp':   '!pp!',
+    'ppp':  '!ppp!',
+    'pppp': '!pppp!',
+    'f':    '!f!',
+    'ff':   '!ff!',
+    'fff':  '!fff!',
+    'ffff': '!ffff!',
+    'mp':   '!mp!',
+    'mf':   '!mf!',
+    'sfz':  '!sfz!',
+}
+percSvg = '''%%beginsvg
+    <defs>
+    <text id="x" x="-3" y="0">&#xe263;</text>
+    <text id="x-" x="-3" y="0">&#xe263;</text>
+    <text id="x+" x="-3" y="0">&#xe263;</text>
+    <text id="normal" x="-3.7" y="0">&#xe0a3;</text>
+    <text id="normal-" x="-3.7" y="0">&#xe0a3;</text>
+    <text id="normal+" x="-3.7" y="0">&#xe0a4;</text>
+    <g id="circle-x"><text x="-3" y="0">&#xe263;</text><circle r="4" class="stroke"></circle></g>
+    <g id="circle-x-"><text x="-3" y="0">&#xe263;</text><circle r="4" class="stroke"></circle></g>
+    <path id="triangle" d="m-4 -3.2l4 6.4 4 -6.4z" class="stroke" style="stroke-width:1.4"></path>
+    <path id="triangle-" d="m-4 -3.2l4 6.4 4 -6.4z" class="stroke" style="stroke-width:1.4"></path>
+    <path id="triangle+" d="m-4 -3.2l4 6.4 4 -6.4z" class="stroke" style="fill:#000"></path>
+    <path id="square" d="m-3.5 3l0 -6.2 7.2 0 0 6.2z" class="stroke" style="stroke-width:1.4"></path>
+    <path id="square-" d="m-3.5 3l0 -6.2 7.2 0 0 6.2z" class="stroke" style="stroke-width:1.4"></path>
+    <path id="square+" d="m-3.5 3l0 -6.2 7.2 0 0 6.2z" class="stroke" style="fill:#000"></path>
+    <path id="diamond" d="m0 -3l4.2 3.2 -4.2 3.2 -4.2 -3.2z" class="stroke" style="stroke-width:1.4"></path>
+    <path id="diamond-" d="m0 -3l4.2 3.2 -4.2 3.2 -4.2 -3.2z" class="stroke" style="stroke-width:1.4"></path>
+    <path id="diamond+" d="m0 -3l4.2 3.2 -4.2 3.2 -4.2 -3.2z" class="stroke" style="fill:#000"></path>
+    </defs>
+    %%endsvg'''
+tabSvg = '''%%beginsvg
+    <style type="text/css">
+    .bf {font-family:sans-serif; font-size:7px}
+    </style>
+    <defs>
+    <rect id="clr" x="-3" y="-1" width="6" height="5" fill="white"></rect>
+    <rect id="clr2" x="-3" y="-1" width="11" height="5" fill="white"></rect>'''
+kopSvg = '<g id="kop%s" class="bf"><use xlink:href="#clr"></use><text x="-2" y="3">%s</text></g>\n'
+kopSvg2 = '<g id="kop%s" class="bf"><use xlink:href="#clr2"></use><text x="-2" y="3">%s</text></g>\n'
+def info (s, warn=1): sys.stderr.write ((warn and '-- ' or '') + s + '\n')
+#-------------------
+# data abstractions
+#-------------------
+class Measure:
+    def __init__ (s, p):
+        s.reset ()
+        s.ixp = p       # part number
+        s.ixm = 0       # measure number
+        s.mdur = 0      # measure duration (nominal metre value in divisions)
+        s.divs = 0      # number of divisions per 1/4
+        s.mtr = 4,4     # meter
+    def reset (s):      # reset each measure
+        s.attr = ''     # measure signatures, tempo
+        s.lline = ''    # left barline, but only holds ':' at start of repeat, otherwise empty
+        s.rline = '|'   # right barline
+        s.lnum = ''     # (left) volta number
+class Note:
+    def __init__ (s, dur=0, n=None):
+        s.tijd = 0      # the time in XML division units
+        s.dur = dur     # duration of a note in XML divisions
+        s.fact = None   # time modification for tuplet notes (num, div)
+        s.tup = ['']    # start(s) and/or stop(s) of tuplet
+        s.tupabc = ''   # abc tuplet string to issue before note
+        s.beam = 0      # 1 = beamed
+        s.grace = 0     # 1 = grace note
+        s.before = []   # abc string that goes before the note/chord
+        s.after = ''    # the same after the note/chord
+        s.ns = n and [n] or []  # notes in the chord
+        s.lyrs = {}     # {number -> syllabe}
+        s.tab = None    # (string number, fret number)
+        s.ntdec = ''    # !string!, !courtesy!
+class Elem:
+    def __init__ (s, string):
+        s.tijd = 0      # the time in XML division units
+        s.str = string  # any abc string that is not a note
+class Counter:
+    def inc (s, key, voice): s.counters [key][voice] = s.counters [key].get (voice, 0) + 1
+    def clear (s, vnums):  # reset all counters
+        tups = list( zip (vnums.keys (), len (vnums) * [0]))
+        s.counters = {'note': dict (tups), 'nopr': dict (tups), 'nopt': dict (tups)}
+    def getv (s, key, voice): return s.counters[key][voice]
+    def prcnt (s, ip):  # print summary of all non zero counters
+        for iv in s.counters ['note']:
+            if s.getv ('nopr', iv) != 0:
+                info ( 'part %d, voice %d has %d skipped non printable notes' % (ip, iv, s.getv ('nopr', iv)))
+            if s.getv ('nopt', iv) != 0:
+                info ( 'part %d, voice %d has %d notes without pitch' % (ip, iv, s.getv ('nopt', iv)))
+            if s.getv ('note', iv) == 0: # no real notes counted in this voice
+                info ( 'part %d, skipped empty voice %d' % (ip, iv))
+class Music:
+    def __init__(s, options):
+        s.tijd = 0              # the current time
+        s.maxtime = 0           # maximum time in a measure
+        s.gMaten = []           # [voices,.. for all measures in a part]
+        s.gLyrics = []          # [{num: (abc_lyric_string, melis)},.. for all measures in a part]
+        s.vnums = {}            # all used voice id's in a part (xml voice id's == numbers)
+        s.cnt = Counter ()      # global counter object
+        s.vceCnt = 1            # the global voice count over all parts
+        s.lastnote = None       # the last real note record inserted in s.voices
+        s.bpl = options.b       # the max number of bars per line when writing abc
+        s.cpl = options.n       # the number of chars per line when writing abc
+        s.repbra = 0            # true if volta is used somewhere
+        s.nvlt = options.v      # no volta on higher voice numbers
+        s.jscript = options.j   # compatibility with javascript version
+    def initVoices (s, newPart=0):
+        s.vtimes, s.voices, s.lyrics = {}, {}, {}
+        for v in s.vnums:
+            s.vtimes [v] = 0    # {voice: the end time of the last item in each voice}
+            s.voices [v] = []   # {voice: [Note|Elem, ..]}
+            s.lyrics [v] = []   # {voice: [{num: syl}, ..]}
+        if newPart: s.cnt.clear (s.vnums)   # clear counters once per part
+    def incTime (s, dt):
+        s.tijd += dt
+        if s.tijd < 0: s.tijd = 0   # erroneous <backup> element
+        if s.tijd > s.maxtime: s.maxtime = s.tijd
+    def appendElemCv (s, voices, elem):
+        for v in voices:
+            s.appendElem (v, elem) # insert element in all voices
+    def insertElem (s, v, elem):    # insert at the start of voice v in the current measure
+        obj = Elem (elem)
+        obj.tijd = 0        # because voice is sorted later
+        s.voices [v].insert (0, obj)
+    def appendObj (s, v, obj, dur):
+        obj.tijd = s.tijd
+        s.voices [v].append (obj)
+        s.incTime (dur)
+        if s.tijd > s.vtimes[v]: s.vtimes[v] = s.tijd   # don't update for inserted earlier items
+    def appendElem (s, v, elem, tel=0):
+        s.appendObj (v, Elem (elem), 0)
+        if tel: s.cnt.inc ('note', v)   # count number of certain elements in each voice (in addition to notes)
+    def appendElemT (s, v, elem, tijd): # insert element at specified time
+        obj = Elem (elem)
+        obj.tijd = tijd
+        s.voices [v].append (obj)
+    def appendNote (s, v, note, noot):
+        note.ns.append (note.ntdec + noot)
+        s.appendObj (v, note, int (note.dur))
+        s.lastnote = note           # remember last note/rest for later modifications (chord, grace)
+        if noot != 'z' and noot != 'x':         # real notes and grace notes
+            s.cnt.inc ('note', v)   # count number of real notes in each voice
+            if not note.grace:                  # for every real note
+                s.lyrics[v].append (note.lyrs)  # even when it has no lyrics
+    def getLastRec (s, voice):
+        if s.gMaten: return s.gMaten[-1][voice][-1] # the last record in the last measure
+        return None                                 # no previous records in the first measure
+    def getLastMelis (s, voice, num):   # get melisma of last measure
+        if s.gLyrics:
+            lyrdict = s.gLyrics[-1][voice]  # the previous lyrics dict in this voice
+            if num in lyrdict: return lyrdict[num][1]   # lyrdict = num -> (lyric string, melisma)
+        return 0 # no previous lyrics in voice or line number
+    def addChord (s, note, noot):   # careful: we assume that chord notes follow immediately
+        for d in note.before:       # put all decorations before chord
+            if d not in s.lastnote.before:
+                s.lastnote.before += [d]
+        s.lastnote.ns.append (note.ntdec + noot)
+    def addBar (s, lbrk, m): # linebreak, measure data
+        if m.mdur and s.maxtime > m.mdur: info ('measure %d in part %d longer than metre' % (m.ixm+1, m.ixp+1))
+        s.tijd = s.maxtime              # the time of the bar lines inserted here
+        for v in s.vnums:
+            if m.lline or m.lnum:       # if left barline or left volta number
+                p = s.getLastRec (v)    # get the previous barline record
+                if p:                   # in measure 1 no previous measure is available
+                    x = p.str           # p.str is the ABC barline string
+                    if m.lline:         # append begin of repeat, m.lline == ':'
+                        x = (x + m.lline).replace (':|:','::').replace ('||','|')
+                    if s.nvlt == 3:     # add volta number only to lowest voice in part 0
+                        if m.ixp + v == min (s.vnums): x += m.lnum
+                    elif m.lnum:        # new behaviour with I:repbra 0
+                        x += m.lnum     # add volta number(s) or text to all voices
+                        s.repbra = 1    # signal occurrence of a volta
+                    p.str = x           # modify previous right barline
+                elif m.lline:           # begin of new part and left repeat bar is required
+                    s.insertElem (v, '|:')
+            if lbrk:
+                p = s.getLastRec (v)    # get the previous barline record
+                if p: p.str += lbrk     # insert linebreak char after the barlines+volta
+            if m.attr:                  # insert signatures at front of buffer
+                s.insertElem (v, '%s' % m.attr)
+            s.appendElem (v, ' %s' % m.rline)   # insert current barline record at time maxtime
+            s.voices[v] = sortMeasure (s.voices[v], m)  # make all times consistent
+            lyrs = s.lyrics[v]          # [{number: sylabe}, .. for all notes]
+            lyrdict = {}                # {number: (abc_lyric_string, melis)} for this voice
+            nums = [num for d in lyrs for num in d.keys ()] # the lyrics numbers in this measure
+            maxNums = max (nums + [0])  # the highest lyrics number in this measure
+            for i in range (maxNums, 0, -1):
+                xs = [syldict.get (i, '') for syldict in lyrs]  # collect the syllabi with number i
+                melis = s.getLastMelis (v, i)  # get melisma from last measure
+                lyrdict [i] = abcLyr (xs, melis)
+            s.lyrics[v] = lyrdict       # {number: (abc_lyric_string, melis)} for this measure
+            mkBroken (s.voices[v])
+        s.gMaten.append (s.voices)
+        s.gLyrics.append (s.lyrics)
+        s.tijd = s.maxtime = 0
+        s.initVoices ()
+    def outVoices (s, divs, ip, isSib): # output all voices of part ip
+        vvmap = {}                  # xml voice number -> abc voice number (one part)
+        vnum_keys = list (s.vnums.keys ())
+        if s.jscript or isSib: vnum_keys.sort ()
+        lvc = min (vnum_keys or [1])	# lowest xml voice number of this part
+        for iv in vnum_keys:
+            if s.cnt.getv ('note', iv) == 0:    # no real notes counted in this voice
+                continue            # skip empty voices
+            if abcOut.denL: unitL = abcOut.denL # take the unit length from the -d option
+            else:           unitL = compUnitLength (iv, s.gMaten, divs) # compute the best unit length for this voice
+            abcOut.cmpL.append (unitL)  # remember for header output
+            vn, vl = [], {}         # for voice iv: collect all notes to vn and all lyric lines to vl
+            for im in range (len (s.gMaten)):
+                measure = s.gMaten [im][iv]
+                vn.append (outVoice (measure, divs [im], im, ip, unitL))
+                checkMelismas (s.gLyrics, s.gMaten, im, iv)
+                for n, (lyrstr, melis) in s.gLyrics [im][iv].items ():
+                    if n in vl:
+                        while len (vl[n]) < im: vl[n].append ('') # fill in skipped measures
+                        vl[n].append (lyrstr)
+                    else:
+                        vl[n] = im * [''] + [lyrstr]    # must skip im measures
+            for n, lyrs in vl.items (): # fill up possibly empty lyric measures at the end
+                mis = len (vn) - len (lyrs)
+                lyrs += mis * ['']
+            abcOut.add ('V:%d' % s.vceCnt)
+            if s.repbra:
+                if s.nvlt == 1 and s.vceCnt > 1: abcOut.add ('I:repbra 0')  # only volta on first voice
+                if s.nvlt == 2 and iv > lvc:     abcOut.add ('I:repbra 0')  # only volta on first voice of each part
+            if   s.cpl > 0:  s.bpl = 0      # option -n (max chars per line) overrules -b (max bars per line)
+            elif s.bpl == 0: s.cpl = 100    # the default: 100 chars per line
+            bn = 0                  # count bars
+            while vn:               # while still measures available
+                ib = 1
+                chunk = vn [0]
+                while ib < len (vn):
+                    if s.cpl > 0 and len (chunk) + len (vn [ib]) >= s.cpl: break    # line full (number of chars)
+                    if s.bpl > 0 and ib >= s.bpl: break                             # line full (number of bars)
+                    chunk += vn [ib]
+                    ib += 1
+                bn += ib
+                abcOut.add (chunk + ' %%%d' % bn)   # line with barnumer
+                del vn[:ib]         # chop ib bars
+                lyrlines = sorted (vl.items ())     # order the numbered lyric lines for output
+                for n, lyrs in lyrlines:
+                    abcOut.add ('w: ' + '|'.join (lyrs[:ib]) + '|')
+                    del lyrs[:ib]
+            vvmap [iv] = s.vceCnt   # xml voice number -> abc voice number
+            s.vceCnt += 1           # count voices over all parts
+        s.gMaten = []               # reset the follwing instance vars for each part
+        s.gLyrics = []
+        s.cnt.prcnt (ip+1)          # print summary of skipped items in this part
+        return vvmap
+class ABCoutput:
+    pagekeys = 'scale,pageheight,pagewidth,leftmargin,rightmargin,topmargin,botmargin'.split (',')
+    def __init__ (s, fnmext, pad, X, options):
+        s.fnmext = fnmext
+        s.outlist = []          # list of ABC strings
+        s.title = 'T:Title'
+        s.key = 'none'
+        s.clefs = {}            # clefs for all abc-voices
+        s.mtr = 'none'
+        s.tempo = 0             # 0 -> no tempo field
+        s.tempo_units = (1,4)	# note type of tempo direction
+        s.pad = pad             # the output path or none
+        s.X = X + 1             # the abc tune number
+        s.denL = options.d      # denominator of the unit length (L:) from -d option
+        s.volpan = int (options.m)  # 0 -> no %%MIDI, 1 -> only program, 2 -> all %%MIDI
+        s.cmpL = []             # computed optimal unit length for all voices
+        s.jscript = options.j   # compatibility with javascript version
+        s.tstep = options.t     # translate percmap to voicemap
+        s.stemless = 0          # use U:s=!stemless!
+        s.shiftStem = options.s # shift note heads 3 units left
+        if pad:
+            _, base_name = os.path.split (fnmext)
+            s.outfile = open (os.path.join (pad, base_name), 'w')
+        else:   s.outfile = sys.stdout
+        if s.jscript: s.X = 1   # always X:1 in javascript version
+        s.pageFmt = {}
+        for k in s.pagekeys: s.pageFmt [k] = None
+        if len (options.p) == 7:
+            for k, v in zip (s.pagekeys, options.p):
+                try: s.pageFmt [k] = float (v)
+                except: info ('illegal float %s for %s', (k, v)); continue
+    def add (s, str):
+        s.outlist.append (str + '\n')   # collect all ABC output
+    def mkHeader (s, stfmap, partlist, midimap, vmpdct, koppen): # stfmap = [parts], part = [staves], stave = [voices]
+        accVce, accStf, staffs = [], [], stfmap[:]  # staffs is consumed
+        for x in partlist:              # collect partnames into accVce and staff groups into accStf
+            try: prgroupelem (x, ('', ''), '', stfmap, accVce, accStf)
+            except: info ('lousy musicxml: error in part-list')
+        staves = ' '.join (accStf)
+        clfnms = {}
+        for part, (partname, partabbrv) in zip (staffs, accVce):
+            if not part: continue       # skip empty part
+            firstVoice = part[0][0]     # the first voice number in this part
+            nm  = partname.replace ('\n','\\n').replace ('.:','.').strip (':')
+            snm = partabbrv.replace ('\n','\\n').replace ('.:','.').strip (':')
+            clfnms [firstVoice] = (nm and 'nm="%s"' % nm or '') + (snm and ' snm="%s"' % snm or '')
+        hd = ['X:%d\n%s\n' % (s.X, s.title)]
+        for i, k in enumerate (s.pagekeys):
+            if s.jscript and k in ['pageheight','topmargin', 'botmargin']: continue
+            if s.pageFmt [k] != None: hd.append ('%%%%%s %.2f%s\n' % (k, s.pageFmt [k], i > 0 and 'cm' or ''))
+        if staves and len (accStf) > 1: hd.append ('%%score ' + staves + '\n')
+        tempo = s.tempo and 'Q:%d/%d=%s\n' % (s.tempo_units [0], s.tempo_units [1], s.tempo) or ''  # default no tempo field
+        d = {}  # determine the most frequently occurring unit length over all voices
+        for x in s.cmpL: d[x] = d.get (x, 0) + 1
+        if s.jscript:   defLs = sorted (d.items (), key=lambda x: (-x[1], x[0])) # when tie (1) sort on key (0)
+        else:           defLs = sorted (d.items (), key=lambda x: -x[1])
+        defL = s.denL and s.denL or defLs [0][0] # override default unit length with -d option
+        hd.append ('L:1/%d\n%sM:%s\n' % (defL, tempo, s.mtr))
+        hd.append ('K:%s\n' % s.key)
+        if s.stemless: hd.append ('U:s=!stemless!\n')
+        vxs = sorted (vmpdct.keys ())
+        for vx in vxs: hd.extend (vmpdct [vx])
+        s.dojef = 0    # translate percmap to voicemap
+        for vnum, clef in s.clefs.items ():
+            ch, prg, vol, pan = midimap [vnum-1][:4]
+            dmap = midimap [vnum - 1][4:]   # map of abc percussion notes to midi notes
+            if dmap and 'perc' not in clef: clef = (clef + ' map=perc').strip ();
+            hd.append ('V:%d %s %s\n' % (vnum, clef, clfnms.get (vnum, '')))
+            if vnum in vmpdct:
+                hd.append ('%%%%voicemap tab%d\n' % vnum)
+                hd.append ('K:none\nM:none\n%%clef none\n%%staffscale 1.6\n%%flatbeams true\n%%stemdir down\n')
+            if 'perc' in clef: hd.append ('K:none\n');  # no key for a perc voice
+            if s.volpan > 1:    # option -m 2 -> output all recognized midi commands when needed and present in xml
+                if ch > 0 and ch != vnum: hd.append ('%%%%MIDI channel %d\n' % ch)
+                if prg > 0:  hd.append ('%%%%MIDI program %d\n' % (prg - 1))
+                if vol >= 0: hd.append ('%%%%MIDI control 7 %.0f\n' % vol)  # volume == 0 is possible ...
+                if pan >= 0: hd.append ('%%%%MIDI control 10 %.0f\n' % pan)
+            elif s.volpan > 0: # default -> only output midi program command when present in xml
+                if dmap and ch > 0: hd.append ('%%%%MIDI channel %d\n' % ch) # also channel if percussion part
+                if prg > 0:  hd.append ('%%%%MIDI program %d\n' % (prg - 1))
+            for abcNote, step, midiNote, notehead in dmap:
+                if not notehead: notehead = 'normal'
+                if abcMid (abcNote) != midiNote or abcNote != step:
+                    if s.volpan > 0: hd.append ('%%%%MIDI drummap %s %s\n' % (abcNote, midiNote))
+                    hd.append ('I:percmap %s %s %s %s\n' % (abcNote, step, midiNote, notehead))
+                    s.dojef = s.tstep
+            if defL != s.cmpL [vnum-1]: # only if computed unit length different from header
+                hd.append ('L:1/%d\n' % s.cmpL [vnum-1])
+        s.outlist = hd + s.outlist
+        if koppen:  # output SVG stuff needed for tablature
+            k1 = kopSvg.replace ('-2','-5') if s.shiftStem else kopSvg  # shift note heads 3 units left
+            k2 = kopSvg2.replace ('-2','-5') if s.shiftStem else kopSvg2
+            tb = tabSvg.replace ('-3','-6') if s.shiftStem else tabSvg
+            ks = sorted (koppen.keys ())                                # javascript compatibility
+            ks = [k2 % (k, k) if len (k) == 2 else k1 % (k, k) for k in ks]
+            tbs = map (lambda x: x.strip () + '\n', tb.splitlines ())   # javascript compatibility
+            s.outlist = tbs + ks + ['</defs>\n%%endsvg\n'] + s.outlist
+    def writeall (s):  # determine the required encoding of the entire ABC output
+        str = ''.join (s.outlist)
+        if s.dojef: str = perc2map (str)
+        if python3: s.outfile.write (str)
+        else:       s.outfile.write (str.encode ('utf-8'))
+        if s.pad: s.outfile.close ()                # close each file with -o option
+        else: s.outfile.write ('\n')                # add empty line between tunes on stdout
+        info ('%s written with %d voices' % (s.fnmext, len (s.clefs)), warn=0)
+#----------------
+# functions
+#----------------
+def abcLyr (xs, melis): # Convert list xs to abc lyrics.
+    if not ''.join (xs): return '', 0  # there is no lyrics in this measure
+    res = []
+    for x in xs:        # xs has for every note a lyrics syllabe or an empty string
+        if x == '':     # note without lyrics
+            if melis: x = '_'   # set melisma
+            else: x = '*'       # skip note
+        elif x.endswith ('_') and not x.endswith ('\_'): # start of new melisma
+            x = x.replace ('_', '') # remove and set melis boolean
+            melis = 1           # so next skips will become melisma
+        else: melis = 0         # melisma stops on first syllable
+        res.append (x)
+    return (' '.join (res), melis)
+def simplify (a, b):    # divide a and b by their greatest common divisor
+    x, y = a, b
+    while b: a, b = b, a % b
+    return x // a, y // a
+def abcdur (nx, divs, uL):      # convert an musicXML duration d to abc units with L:1/uL
+    if nx.dur == 0: return ''   # when called for elements without duration
+    num, den = simplify (uL * nx.dur, divs * 4) # L=1/8 -> uL = 8 units
+    if nx.fact:                 # apply tuplet time modification
+        numfac, denfac = nx.fact
+        num, den = simplify (num * numfac, den * denfac)
+    if den > 64:                # limit the denominator to a maximum of 64
+        x = float (num) / den; n = math.floor (x);  # when just above an integer n
+        if x - n < 0.1 * x: num, den = n, 1;        # round to n
+        num64 = 64. * num / den + 1.0e-15           # to get Python2 behaviour of round
+        num, den = simplify (int (round (num64)), 64)
+    if num == 1:
+        if   den == 1: dabc = ''
+        elif den == 2: dabc = '/'
+        else:          dabc = '/%d' % den
+    elif den == 1:     dabc = '%d' % num
+    else:              dabc = '%d/%d' % (num, den)
+    return dabc
+def abcMid (note):  # abc note -> midi pitch
+    r = re.search (r"([_^]*)([A-Ga-g])([',]*)", note)
+    if not r: return -1
+    acc, n, oct = r.groups ()
+    nUp = n.upper ()
+    p = 60 + [0,2,4,5,7,9,11]['CDEFGAB'.index (nUp)] + (12 if nUp != n else 0);
+    if acc: p += (1 if acc[0] == '^' else -1) * len (acc)
+    if oct: p += (12 if oct[0] == "'" else -12) * len (oct)
+    return p
+def staffStep (ptc, o, clef, tstep):
+    ndif = 0
+    if 'stafflines=1' in clef: ndif += 4                    # meaning of one line: E (xml) -> B (abc)
+    if not tstep and clef.startswith ('bass'): ndif += 12   # transpose bass -> treble (C3 -> A4)
+    if ndif:    # diatonic transposition == addition modulo 7
+        nm7 = 'C,D,E,F,G,A,B'.split (',')
+        n = nm7.index (ptc) + ndif
+        ptc, o = nm7 [n % 7], o + n // 7
+    if o > 4: ptc = ptc.lower ()
+    if o > 5: ptc = ptc + (o-5) * "'"
+    if o < 4: ptc = ptc + (4-o) * ","
+    return ptc
+def setKey (fifths, mode):
+    sharpness = ['Fb', 'Cb','Gb','Db','Ab','Eb','Bb','F','C','G','D','A', 'E', 'B', 'F#','C#','G#','D#','A#','E#','B#']
+    offTab = {'maj':8, 'ion':8, 'm':11, 'min':11, 'aeo':11, 'mix':9, 'dor':10, 'phr':12, 'lyd':7, 'loc':13, 'non':8}
+    mode = mode.lower ()[:3] # only first three chars, no case
+    key = sharpness [offTab [mode] + fifths] + (mode if offTab [mode] != 8 else '')
+    accs = ['F','C','G','D','A','E','B']
+    if fifths >= 0: msralts = dict (zip (accs[:fifths], fifths * [1]))
+    else:           msralts = dict (zip (accs[fifths:], -fifths * [-1]))
+    return key, msralts
+def insTup (ix, notes, fact):   # read one nested tuplet
+    tupcnt = 0
+    nx = notes [ix]
+    if 'start' in nx.tup:
+        nx.tup.remove ('start') # do recursive calls when starts remain
+    tix = ix                    # index of first tuplet note
+    fn, fd = fact               # xml time-mod of the higher level
+    fnum, fden = nx.fact        # xml time-mod of the current level
+    tupfact = fnum//fn, fden//fd  # abc time mod of this level
+    while ix < len (notes):
+        nx = notes [ix]
+        if isinstance (nx, Elem) or nx.grace:
+            ix += 1             # skip all non tuplet elements
+            continue
+        if 'start' in nx.tup:   # more nested tuplets to start
+            ix, tupcntR = insTup (ix, notes, tupfact)   # ix is on the stop note!
+            tupcnt += tupcntR
+        elif nx.fact:
+            tupcnt += 1         # count tuplet elements
+        if 'stop' in nx.tup:
+            nx.tup.remove ('stop')
+            break
+        if not nx.fact:         # stop on first non tuplet note
+            ix = lastix         # back to last tuplet note
+            break
+        lastix = ix
+        ix += 1
+    # put abc tuplet notation before the recursive ones
+    tup = (tupfact[0], tupfact[1], tupcnt)
+    if tup == (3, 2, 3): tupPrefix = '(3'
+    else:                tupPrefix = '(%d:%d:%d' % tup
+    notes [tix].tupabc = tupPrefix + notes [tix].tupabc
+    return ix, tupcnt           # ix is on the last tuplet note
+def mkBroken (vs):      # introduce broken rhythms (vs: one voice, one measure)
+    vs = [n for n in vs if isinstance (n, Note)]
+    i = 0
+    while i < len (vs) - 1:
+        n1, n2 = vs[i], vs[i+1]     # scan all adjacent pairs
+        # skip if note in tuplet or has no duration or outside beam
+        if not n1.fact and not n2.fact and n1.dur > 0 and n2.beam:
+            if n1.dur * 3 == n2.dur:
+                n2.dur = (2 * n2.dur) // 3
+                n1.dur = n1.dur * 2
+                n1.after = '<' + n1.after
+                i += 1              # do not chain broken rhythms
+            elif n2.dur * 3 == n1.dur:
+                n1.dur = (2 * n1.dur) // 3
+                n2.dur = n2.dur * 2
+                n1.after = '>' + n1.after
+                i += 1              # do not chain broken rhythms
+        i += 1
+def outVoice (measure, divs, im, ip, unitL):    # note/elem objects of one measure in one voice
+    ix = 0
+    while ix < len (measure):   # set all (nested) tuplet annotations
+        nx = measure [ix]
+        if isinstance (nx, Note) and nx.fact and not nx.grace:
+            ix, tupcnt = insTup (ix, measure, (1, 1))   # read one tuplet, insert annotation(s)
+        ix += 1
+    vs = []
+    for nx in measure:
+        if isinstance (nx, Note):
+            durstr = abcdur (nx, divs, unitL)           # xml -> abc duration string
+            chord = len (nx.ns) > 1
+            cns = [nt[:-1] for nt in nx.ns if nt.endswith ('-')]
+            tie = ''
+            if chord and len (cns) == len (nx.ns):      # all chord notes tied
+                nx.ns = cns     # chord notes without tie
+                tie = '-'       # one tie for whole chord
+            s = nx.tupabc + ''.join (nx.before)
+            if chord: s += '['
+            for nt in nx.ns: s += nt
+            if chord: s += ']' + tie
+            if s.endswith ('-'): s, tie = s[:-1], '-'   # split off tie
+            s += durstr + tie   # and put it back again
+            s += nx.after
+            nospace = nx.beam
+        else:
+            if isinstance (nx.str, listtype): nx.str = nx.str [0]
+            s = nx.str
+            nospace = 1
+        if nospace: vs.append (s)
+        else: vs.append (' ' + s)
+    vs = ''.join (vs)   # ad hoc: remove multiple pedal directions
+    while vs.find ('!ped!!ped!') >= 0: vs = vs.replace ('!ped!!ped!','!ped!')
+    while vs.find ('!ped-up!!ped-up!') >= 0: vs = vs.replace ('!ped-up!!ped-up!','!ped-up!')
+    while vs.find ('!8va(!!8va)!') >= 0: vs = vs.replace ('!8va(!!8va)!','')    # remove empty ottava's
+    return vs
+def sortMeasure (voice, m):
+    voice.sort (key=lambda o: o.tijd)   # sort on time
+    time = 0
+    v = []
+    rs = []                            # holds rests in between notes
+    for i, nx in enumerate (voice):    # establish sequentiality
+        if nx.tijd > time and chkbug (nx.tijd - time, m):
+            v.append (Note (nx.tijd - time, 'x'))   # fill hole with invisble rest
+            rs.append (len (v) - 1)
+        if isinstance (nx, Elem):
+            if nx.tijd < time: nx.tijd = time # shift elems without duration to where they fit
+            v.append (nx)
+            time = nx.tijd
+            continue
+        if nx.tijd < time:                  # overlapping element
+            if nx.ns[0] == 'z': continue    # discard overlapping rest
+            if v[-1].tijd <= nx.tijd:       # we can do something
+                if v[-1].ns[0] == 'z':      # shorten rest
+                    v[-1].dur = nx.tijd - v[-1].tijd
+                    if v[-1].dur == 0: del v[-1]        # nothing left
+                    info ('overlap in part %d, measure %d: rest shortened' % (m.ixp+1, m.ixm+1))
+                else:                       # make a chord of overlap
+                    v[-1].ns += nx.ns
+                    info ('overlap in part %d, measure %d: added chord' % (m.ixp+1, m.ixm+1))
+                    nx.dur = (nx.tijd + nx.dur) - time  # the remains
+                    if nx.dur <= 0: continue            # nothing left
+                    nx.tijd = time          # append remains
+            else:                           # give up
+                info ('overlapping notes in one voice! part %d, measure %d, note %s discarded' % (m.ixp+1, m.ixm+1, isinstance (nx, Note) and nx.ns or nx.str))
+                continue
+        v.append (nx)
+        if isinstance (nx, Note):
+            if nx.ns [0] in 'zx':
+                rs.append (len (v) - 1)     # remember rests between notes
+            elif len (rs):
+                if nx.beam and not nx.grace:    # copy beam into rests
+                    for j in rs: v[j].beam = nx.beam
+                rs = []                     # clear rests on each note
+        time = nx.tijd + nx.dur
+    #   when a measure contains no elements and no forwards -> no incTime -> s.maxtime = 0 -> right barline
+    #   is inserted at time == 0 (in addbar) and is only element in the voice when sortMeasure is called
+    if time == 0: info ('empty measure in part %d, measure %d, it should contain at least a rest to advance the time!' % (m.ixp+1, m.ixm+1))
+    return v
+def getPartlist (ps):   # correct part-list (from buggy xml-software)
+    xs = [] # the corrected part-list
+    e = []  # stack of opened part-groups
+    for x in list (ps): # insert missing stops, delete double starts
+        if x.tag ==  'part-group':
+            num, type = x.get ('number'), x.get ('type')
+            if type == 'start':
+                if num in e:    # missing stop: insert one
+                    xs.append (E.Element ('part-group', number = num, type = 'stop'))
+                    xs.append (x)
+                else:           # normal start
+                    xs.append (x)
+                    e.append (num)
+            else:
+                if num in e:    # normal stop
+                    e.remove (num)
+                    xs.append (x)
+                else: pass      # double stop: skip it
+        else: xs.append (x)
+    for num in reversed (e):    # fill missing stops at the end
+        xs.append (E.Element ('part-group', number = num, type = 'stop'))
+    return xs
+def parseParts (xs, d, e):  # -> [elems on current level], rest of xs
+    if not xs: return [],[]
+    x = xs.pop (0)
+    if x.tag == 'part-group':
+        num, type = x.get ('number'), x.get ('type')
+        if type == 'start': # go one level deeper
+            s = [x.findtext (n, '') for n in ['group-symbol','group-barline','group-name','group-abbreviation']]
+            d [num] = s     # remember groupdata by group number
+            e.append (num)  # make stack of open group numbers
+            elemsnext, rest1 = parseParts (xs, d, e) # parse one level deeper to next stop
+            elems, rest2 = parseParts (rest1, d, e)  # parse the rest on this level
+            return [elemsnext] + elems, rest2
+        else:               # stop: close level and return group-data
+            nums = e.pop () # last open group number in stack order
+            if xs and xs[0].get ('type') == 'stop':     # two consequetive stops
+                if num != nums:                         # in the wrong order (tempory solution)
+                    d[nums], d[num] = d[num], d[nums]   # exchange values    (only works for two stops!!!)
+            sym = d[num]    # retrieve an return groupdata as last element of the group
+            return [sym], xs
+    else:
+        elems, rest = parseParts (xs, d, e) # parse remaining elements on current level
+        name = x.findtext ('part-name',''), x.findtext ('part-abbreviation','')
+        return [name] + elems, rest
+def bracePart (part):       # put a brace on multistaff part and group voices
+    if not part: return []  # empty part in the score
+    brace = []
+    for ivs in part:
+        if len (ivs) == 1:  # stave with one voice
+            brace.append ('%s' % ivs[0])
+        else:               # stave with multiple voices
+            brace += ['('] + ['%s' % iv for iv in ivs] + [')']
+        brace.append ('|')
+    del brace[-1]           # no barline at the end
+    if len (part) > 1:
+        brace = ['{'] + brace + ['}']
+    return brace
+def prgroupelem (x, gnm, bar, pmap, accVce, accStf):    # collect partnames (accVce) and %%score map (accStf)
+    if type (x) == tupletype:   # partname-tuple = (part-name, part-abbrev)
+        y = pmap.pop (0)
+        if gnm[0]: x = [n1 + ':' + n2 for n1, n2 in zip (gnm, x)]   # put group-name before part-name
+        accVce.append (x)
+        accStf.extend (bracePart (y))
+    elif len (x) == 2 and type (x[0]) == tupletype: # misuse of group just to add extra name to stave
+        y = pmap.pop (0)
+        nms = [n1 + ':' + n2 for n1, n2 in zip (x[0], x[1][2:])]    # x[0] = partname-tuple, x[1][2:] = groupname-tuple
+        accVce.append (nms)
+        accStf.extend (bracePart (y))
+    else:
+        prgrouplist (x, bar, pmap, accVce, accStf)
+def prgrouplist (x, pbar, pmap, accVce, accStf):    # collect partnames, scoremap for a part-group
+    sym, bar, gnm, gabbr = x[-1]    # bracket symbol, continue barline, group-name-tuple
+    bar = bar == 'yes' or pbar      # pbar -> the parent has bar
+    accStf.append (sym == 'brace' and '{' or '[')
+    for z in x[:-1]:
+        prgroupelem (z, (gnm, gabbr), bar, pmap, accVce, accStf)
+        if bar: accStf.append ('|')
+    if bar: del accStf [-1]         # remove last one before close
+    accStf.append (sym == 'brace' and '}' or ']')
+def compUnitLength (iv, maten, divs):   # compute optimal unit length
+    uLmin, minLen = 0, max_int
+    for uL in [4,8,16]:     # try 1/4, 1/8 and 1/16
+        vLen = 0            # total length of abc duration strings in this voice
+        for im, m in enumerate (maten): # all measures
+            for e in m[iv]: # all notes in voice iv
+                if isinstance (e, Elem) or e.dur == 0: continue # no real durations
+                vLen += len (abcdur (e, divs [im], uL))  # add len of duration string
+        if vLen < minLen: uLmin, minLen = uL, vLen  # remember the smallest
+    return uLmin
+def doSyllable (syl):
+    txt = ''
+    for e in syl:
+        if   e.tag == 'elision': txt += '~'
+        elif e.tag == 'text':   # escape - and space characters
+            txt += (e.text or '').replace ('_','\_').replace('-', r'\-').replace(' ', '~')
+    if not txt: return txt
+    if syl.findtext('syllabic') in ['begin', 'middle']: txt += '-'
+    if syl.find('extend') is not None:                  txt += '_'
+    return txt
+def checkMelismas (lyrics, maten, im, iv):
+    if im == 0: return
+    maat = maten [im][iv]               # notes of the current measure
+    curlyr = lyrics [im][iv]            # lyrics dict of current measure
+    prvlyr = lyrics [im-1][iv]          # lyrics dict of previous measure
+    for n, (lyrstr, melis) in prvlyr.items ():  # all lyric numbers in the previous measure
+        if n not in curlyr and melis:   # melisma required, but no lyrics present -> make one!
+            ms = getMelisma (maat)      # get a melisma for the current measure
+            if ms: curlyr [n] = (ms, 0) # set melisma as the n-th lyrics of the current measure
+def getMelisma (maat):                  # get melisma from notes in maat
+    ms = []
+    for note in maat:                   # every note should get an underscore
+        if not isinstance (note, Note): continue    # skip Elem's
+        if note.grace: continue         # skip grace notes
+        if note.ns [0] in 'zx': break   # stop on first rest
+        ms.append ('_')
+    return ' '.join (ms)
+def perc2map (abcIn):
+    fillmap = {'diamond':1, 'triangle':1, 'square':1, 'normal':1};
+    abc = map (lambda x: x.strip (), percSvg.splitlines ())
+    id='default'
+    maps = {'default': []};
+    dmaps = {'default': []}
+    r1 = re.compile (r'V:\s*(\S+)')
+    ls = abcIn.splitlines ()
+    for x in ls:
+        if 'I:percmap' in x:
+            noot, step, midi, kop = map (lambda x: x.strip (), x.split ()[1:])
+            if kop in fillmap: kop = kop + '+' + ',' + kop
+            x = '%%%%map perc%s %s print=%s midi=%s heads=%s' % (id, noot, step, midi, kop)
+            maps [id].append (x)
+        if '%%MIDI' in x: dmaps [id].append (x)
+        if 'V:' in x:
+            r = r1.match (x)
+            if r:
+                id = r.group (1);
+                if id not in maps: maps [id] = []; dmaps [id] = []
+    ids = sorted (maps.keys ())
+    for id in ids: abc += maps [id]
+    id='default'
+    for x in ls:
+        if 'I:percmap' in x: continue
+        if '%%MIDI' in x: continue
+        if 'V:' in x or 'K:' in x:
+            r = r1.match (x)
+            if r: id = r.group (1)
+            abc.append (x)
+            if id in dmaps and len (dmaps [id]) > 0: abc.extend (dmaps [id]); del dmaps [id]
+            if 'perc' in x and 'map=' not in x: x += ' map=perc';
+            if 'map=perc' in x and len (maps [id]) > 0: abc.append ('%%voicemap perc' + id);
+            if 'map=off' in x: abc.append ('%%voicemap');
+        else:
+            abc.append (x)
+    return '\n'.join (abc) + '\n'
+def addoct (ptc, o):    # xml staff step, xml octave number
+    p = ptc
+    if o > 4: p = ptc.lower ()
+    if o > 5: p = p + (o-5) * "'"
+    if o < 4: p = p + (4-o) * ","
+    return p            # abc pitch == abc note without accidental
+def chkbug (dt, m):
+    if dt > m.divs / 16: return 1   # duration should be > 1/64 note
+    info ('MuseScore bug: incorrect duration, smaller then 1/64! in measure %d, part %d' % (m.ixm, m.ixp))
+    return 0
+#----------------
+# parser
+#----------------
+class Parser:
+    note_alts = [   # 3 alternative notations of the same note for tablature mapping
+        [x.strip () for x in '=C,  ^C, =D, ^D, =E, =F, ^F, =G, ^G, =A, ^A, =B'.split (',')],
+        [x.strip () for x in '^B,  _D,^^C, _E, _F, ^E, _G,^^F, _A,^^G, _B, _C'.split (',')],
+        [x.strip () for x in '__D,^^B,__E,__F,^^D,__G,^^E,__A,_/A,__B,__C,^^A'.split (',')] ]
+    step_map = {'C':0,'D':2,'E':4,'F':5,'G':7,'A':9,'B':11}
+    def __init__ (s, options):
+        # unfold repeats, number of chars per line, credit filter level, volta option
+        s.slurBuf = {}    # dict of open slurs keyed by slur number
+        s.dirStk = {}     # {direction-type + number -> (type, voice | time)} dict for proper closing
+        s.ingrace = 0     # marks a sequence of grace notes
+        s.msc = Music (options)  # global music data abstraction
+        s.unfold = options.u    # turn unfolding repeats on
+        s.ctf = options.c       # credit text filter level
+        s.gStfMap = []    # [[abc voice numbers] for all parts]
+        s.midiMap = []    # midi-settings for each abc voice, in order
+        s.drumInst = {}   # inst_id -> midi pitch for channel 10 notes
+        s.drumNotes = {}  # (xml voice, abc note) -> (midi note, note head)
+        s.instMid = []    # [{inst id -> midi-settings} for all parts]
+        s.midDflt = [-1,-1,-1,-91] # default midi settings for channel, program, volume, panning
+        s.msralts = {}    # xml-notenames (without octave) with accidentals from the key
+        s.curalts = {}    # abc-notenames (with voice number) with passing accidentals
+        s.stfMap = {}     # xml staff number -> [xml voice number]
+        s.vce2stf = {}    # xml voice number -> allocated staff number
+        s.clefMap = {}    # xml staff number -> abc clef (for header only)
+        s.curClef = {}    # xml staff number -> current abc clef
+        s.stemDir = {}    # xml voice number -> current stem direction
+        s.clefOct = {}    # xml staff number -> current clef-octave-change
+        s.curStf = {}     # xml voice number -> current xml staff number
+        s.nolbrk = options.x;   # generate no linebreaks ($)
+        s.jscript = options.j   # compatibility with javascript version
+        s.ornaments = sorted (note_ornamentation_map.items ())
+        s.doPageFmt = len (options.p) == 1 # translate xml page format
+        s.tstep = options.t     # clef determines step on staff (percussion)
+        s.dirtov1 = options.v1  # all directions to first voice of staff
+        s.ped = options.ped     # render pedal directions
+        s.wstems = options.stm  # translate stem elements
+        s.pedVce = None   # voice for pedal directions
+        s.repeat_str = {} # staff number -> [measure number, repeat-text]
+        s.tabVceMap = {}  # abc voice num -> [%%map ...] for tab voices
+        s.koppen = {}     # noteheads needed for %%map
+    def matchSlur (s, type2, n, v2, note2, grace, stopgrace): # match slur number n in voice v2, add abc code to before/after
+        if type2 not in ['start', 'stop']: return   # slur type continue has no abc equivalent
+        if n == None: n = '1'
+        if n in s.slurBuf:
+            type1, v1, note1, grace1 = s.slurBuf [n]
+            if type2 != type1:              # slur complete, now check the voice
+                if v2 == v1:                # begins and ends in the same voice: keep it
+                    if type1 == 'start' and (not grace1 or not stopgrace):  # normal slur: start before stop and no grace slur
+                        note1.before = ['('] + note1.before # keep left-right order!
+                        note2.after += ')'
+                    # no else: don't bother with reversed stave spanning slurs
+                del s.slurBuf [n]           # slur finished, remove from stack
+            else:                           # double definition, keep the last
+                info ('double slur numbers %s-%s in part %d, measure %d, voice %d note %s, first discarded' % (type2, n, s.msr.ixp+1, s.msr.ixm+1, v2, note2.ns))
+                s.slurBuf [n] = (type2, v2, note2, grace)
+        else:                               # unmatched slur, put in dict
+            s.slurBuf [n] = (type2, v2, note2, grace)
+    def doNotations (s, note, nttn, isTab):
+        for key, val in s.ornaments:
+            if nttn.find (key) != None: note.before += [val]  # just concat all ornaments
+        trem = nttn.find ('ornaments/tremolo')
+        if trem != None:
+            type = trem.get ('type')
+            if type == 'single':
+                note.before.insert (0, '!%s!' % (int (trem.text) * '/'))
+            else:
+                note.fact = None    # no time modification in ABC
+                if s.tstep:         # abc2svg version
+                    if type == 'stop': note.before.insert (0, '!trem%s!' % trem.text);
+                else:               # abc2xml version
+                    if type == 'start': note.before.insert (0, '!%s-!' % (int (trem.text) * '/'));
+        fingering = nttn.findall ('technical/fingering')
+        for finger in fingering:    # handle multiple finger annotations
+            if not isTab: note.before += ['!%s!' % finger.text] # fingering goes before chord (addChord)
+        snaar = nttn.find ('technical/string')
+        if snaar != None and isTab:
+            if s.tstep:
+                fret = nttn.find ('technical/fret')
+                if fret != None: note.tab = (snaar.text, fret.text)
+            else:
+                deco = '!%s!' % snaar.text  # no double string decos (bug in musescore)
+                if deco not in note.ntdec: note.ntdec += deco
+        wvln = nttn.find ('ornaments/wavy-line')
+        if wvln != None:
+            if   wvln.get ('type') == 'start': note.before = ['!trill(!'] + note.before # keep left-right order!
+            elif wvln.get ('type') == 'stop': note.before = ['!trill)!'] + note.before
+        glis = nttn.find ('glissando')
+        if glis == None: glis = nttn.find ('slide') # treat slide as glissando
+        if glis != None:
+            lt = '~' if glis.get ('line-type') =='wavy' else '-'
+            if   glis.get ('type') == 'start': note.before = ['!%s(!' % lt] + note.before # keep left-right order!
+            elif glis.get ('type') == 'stop': note.before = ['!%s)!' % lt] + note.before
+    def tabnote (s, alt, ptc, oct, v, ntrec):
+        p = s.step_map [ptc] + int (alt or '0') # p in -2 .. 13
+        if p > 11: oct += 1             # octave correction
+        if p < 0: oct -= 1
+        p = p % 12                      # remap p into 0..11
+        snaar_nw, fret_nw = ntrec.tab   # the computed/annotated allocation of nt
+        for i in range (4):             # support same note on 4 strings
+            na = s.note_alts [i % 3] [p]    # get alternative representation of same note
+            o = oct
+            if na in ['^B', '^^B']: o -= 1  # because in adjacent octave
+            if na in ['_C', '__C']: o += 1
+            if '/' in na or i == 3: o = 9   # emergency notation for 4th string case
+            nt = addoct (na, o)
+            snaar, fret = s.tabmap.get ((v, nt), ('', ''))  # the current allocation of nt
+            if not snaar: break             # note not yet allocated
+            if snaar_nw == snaar: return nt # use present allocation
+            if i == 3:                  # new allocaion needed but none is free
+                fmt = 'rejected: voice %d note %3s string %s fret %2s remains: string %s fret %s'
+                info (fmt % (v, nt, snaar_nw, fret_nw, snaar, fret), 1)
+                ntrec.tab = (snaar, fret)
+        s.tabmap [v, nt] = ntrec.tab    # for tablature map (voice, note) -> (string, fret)
+        return nt                       # ABC code always in key C (with midi pitch alterations)
+    def ntAbc (s, ptc, oct, note, v, ntrec, isTab): # pitch, octave -> abc notation
+        acc2alt = {'double-flat':-2,'flat-flat':-2,'flat':-1,'natural':0,'sharp':1,'sharp-sharp':2,'double-sharp':2}
+        oct += s.clefOct.get (s.curStf [v], 0)  # minus clef-octave-change value
+        acc = note.findtext ('accidental')  # should be the notated accidental
+        alt = note.findtext ('pitch/alter') # pitch alteration (midi)
+        if ntrec.tab: return s.tabnote (alt, ptc, oct, v, ntrec)    # implies s.tstep is true (options.t was given)
+        elif isTab and s.tstep:
+            nt = ['__','_','','^','^^'][int (alt or '0') + 2] + addoct (ptc, oct)
+            info ('no string notation found for note %s in voice %d' % (nt, v), 1)
+        p = addoct (ptc, oct)
+        if alt == None and s.msralts.get (ptc, 0): alt = 0  # no alt but key implies alt -> natural!!
+        if alt == None and (p, v) in s.curalts: alt = 0     # no alt but previous note had one -> natural!!
+        if acc == None and alt == None: return p    # no acc, no alt
+        elif acc != None:
+            alt = acc2alt [acc]      # acc takes precedence over the pitch here!
+        else:   # now see if we really must add an accidental
+            alt = int (float (alt))
+            if (p, v) in s.curalts:  # the note in this voice has been altered before
+                if alt == s.curalts [(p, v)]: return p      # alteration still the same
+            elif alt == s.msralts.get (ptc, 0): return p    # alteration implied by the key
+            tieElms = note.findall ('tie') + note.findall ('notations/tied')    # in xml we have separate notated ties and playback ties
+            if 'stop' in [e.get ('type') for e in tieElms]: return p    # don't alter tied notes
+            info ('accidental %d added in part %d, measure %d, voice %d note %s' % (alt, s.msr.ixp+1, s.msr.ixm+1, v+1, p))
+        s.curalts [(p, v)] = alt
+        p = ['__','_','=','^','^^'][alt+2] + p # and finally ... prepend the accidental
+        return p
+    def doNote (s, n):    # parse a musicXML note tag
+        note = Note ()
+        v = int (n.findtext ('voice', '1'))
+        if s.isSib: v += 100 * int (n.findtext ('staff', '1'))  # repair bug in Sibelius
+        chord = n.find ('chord') != None
+        p = n.findtext ('pitch/step') or n.findtext ('unpitched/display-step')
+        o = n.findtext ('pitch/octave') or n.findtext ('unpitched/display-octave')
+        r = n.find ('rest')
+        numer = n.findtext ('time-modification/actual-notes')
+        if numer:
+            denom = n.findtext ('time-modification/normal-notes')
+            note.fact = (int (numer), int (denom))
+        note.tup = [x.get ('type') for x in n.findall ('notations/tuplet')]
+        dur = n.findtext ('duration')
+        grc = n.find ('grace')
+        note.grace = grc != None
+        note.before, note.after = [], '' # strings with ABC stuff that goes before or after a note/chord
+        if note.grace and not s.ingrace: # open a grace sequence
+            s.ingrace = 1
+            note.before = ['{']
+            if grc.get ('slash') == 'yes': note.before += ['/'] # acciaccatura
+        stopgrace = not note.grace and s.ingrace
+        if stopgrace:                   # close the grace sequence
+            s.ingrace = 0
+            s.msc.lastnote.after += '}' # close grace on lastenote.after
+        if dur == None or note.grace: dur = 0
+        if r == None and n.get ('print-object') == 'no':
+            if chord: return
+            r = 1  # turn invisible notes (that advance the time) into invisible rests
+        note.dur = int (dur)
+        if r == None and (not p or not o):  # not a rest and no pitch
+            s.msc.cnt.inc ('nopt', v)       # count unpitched notes
+            o, p = 5,'E'                    # make it an E5 ??
+        isTab = s.curClef and s.curClef.get (s.curStf [v], '').startswith ('tab')
+        nttn = n.find ('notations')     # add ornaments
+        if nttn != None: s.doNotations (note, nttn, isTab)
+        e = n.find ('stem') if r == None else None  # no !stemless! before rest
+        if e != None and e.text == 'none' and (not isTab or v in s.hasStems or s.tstep):
+            note.before += ['s']; abcOut.stemless = 1;
+        e = n.find ('accidental')
+        if e != None and e.get ('parentheses') == 'yes': note.ntdec += '!courtesy!'
+        if r != None: noot = 'x' if n.get ('print-object') == 'no' or isTab else 'z'
+        else: noot = s.ntAbc (p, int (o), n, v, note, isTab)
+        if n.find ('unpitched') != None:
+            clef = s.curClef [s.curStf [v]]     # the current clef for this voice
+            step = staffStep (p, int (o), clef, s.tstep)        # (clef independent) step value of note on the staff
+            instr = n.find ('instrument')
+            instId = instr.get ('id') if instr != None else 'dummyId'
+            midi = s.drumInst.get (instId, abcMid (noot))
+            nh =  n.findtext ('notehead', '').replace (' ','-') # replace spaces in xml notehead names for percmap
+            if nh == 'x': noot = '^' + noot.replace ('^','').replace ('_','')
+            if nh in ['circle-x','diamond','triangle']: noot = '_' + noot.replace ('^','').replace ('_','')
+            if nh and n.find ('notehead').get ('filled','') == 'yes': nh += '+'
+            if nh and n.find ('notehead').get ('filled','') == 'no': nh += '-'
+            s.drumNotes [(v, noot)] = (step, midi, nh) # keep data for percussion map
+        tieElms = n.findall ('tie') + n.findall ('notations/tied')  # in xml we have separate notated ties and playback ties
+        if 'start' in [e.get ('type') for e in tieElms]:            # n can have stop and start tie
+            noot = noot + '-'
+        note.beam = sum ([1 for b in n.findall('beam') if b.text in ['continue', 'end']]) + int (note.grace)
+        lyrlast = 0; rsib = re.compile (r'^.*verse')
+        for e in n.findall ('lyric'):
+            lyrnum = int (rsib.sub ('', e.get ('number', '1'))) # also do Sibelius numbers
+            if lyrnum == 0: lyrnum = lyrlast + 1                # and correct Sibelius bugs
+            else: lyrlast = lyrnum
+            note.lyrs [lyrnum] = doSyllable (e)
+        stemdir = n.findtext ('stem')
+        if s.wstems and (stemdir == 'up' or stemdir == 'down'):
+            if stemdir != s.stemDir.get (v, ''):
+                s.stemDir [v] = stemdir
+                s.msc.appendElem (v, '[I:stemdir %s]' % stemdir)
+        if chord: s.msc.addChord (note, noot)
+        else:
+            xmlstaff = int (n.findtext ('staff', '1'))
+            if s.curStf [v] != xmlstaff:    # the note should go to another staff
+                dstaff = xmlstaff - s.curStf [v]    # relative new staff number
+                s.curStf [v] = xmlstaff     # remember the new staff for this voice
+                s.msc.appendElem (v, '[I:staff %+d]' % dstaff)  # insert a move before the note
+            s.msc.appendNote (v, note, noot)
+        for slur in n.findall ('notations/slur'):   # s.msc.lastnote points to the last real note/chord inserted above
+            s.matchSlur (slur.get ('type'), slur.get ('number'), v, s.msc.lastnote, note.grace, stopgrace) # match slur definitions
+    def doAttr (s, e):    # parse a musicXML attribute tag
+        teken = {'C1':'alto1','C2':'alto2','C3':'alto','C4':'tenor','F4':'bass','F3':'bass3','G2':'treble','TAB':'tab','percussion':'perc'}
+        dvstxt = e.findtext ('divisions')
+        if dvstxt: s.msr.divs = int (dvstxt)
+        steps = int (e.findtext ('transpose/chromatic', '0'))   # for transposing instrument
+        fifths = e.findtext ('key/fifths')
+        first = s.msc.tijd == 0 and s.msr.ixm == 0  # first attributes in first measure
+        if fifths:
+            key, s.msralts = setKey (int (fifths), e.findtext ('key/mode','major'))
+            if first and not steps and abcOut.key == 'none':
+                abcOut.key = key                # first measure -> header, if not transposing instrument or percussion part!
+            elif key != abcOut.key or not first:
+                s.msr.attr += '[K:%s]' % key    # otherwise -> voice
+        beats = e.findtext ('time/beats')
+        if beats:
+            unit = e.findtext ('time/beat-type')
+            mtr = beats + '/' + unit
+            if first: abcOut.mtr = mtr          # first measure -> header
+            else: s.msr.attr += '[M:%s]' % mtr  # otherwise -> voice
+            s.msr.mtr = int (beats), int (unit)
+        s.msr.mdur = (s.msr.divs * s.msr.mtr[0] * 4) // s.msr.mtr[1]   # duration of measure in xml-divisions
+        for ms in e.findall('measure-style'):
+            n = int (ms.get ('number', '1'))    # staff number
+            voices = s.stfMap [n]               # all voices of staff n
+            for mr in ms.findall('measure-repeat'):
+                ty = mr.get('type')
+                if ty == 'start':               # remember start measure number and text voor each staff
+                    s.repeat_str [n] = [s.msr.ixm, mr.text]
+                    for v in voices:            # insert repeat into all voices, value will be overwritten at stop
+                        s.msc.insertElem (v, s.repeat_str [n])
+                elif ty == 'stop':              # calculate repeat measure count for this staff n
+                    start_ix, text_ = s.repeat_str [n]
+                    repeat_count = s.msr.ixm - start_ix
+                    if text_:
+                        mid_str =  "%s " % text_
+                        repeat_count /= int (text_)
+                    else:
+                        mid_str = ""            # overwrite repeat with final string
+                    s.repeat_str [n][0] = '[I:repeat %s%d]' % (mid_str, repeat_count)
+                    del s.repeat_str [n]        # remove closed repeats
+        toct = e.findtext ('transpose/octave-change', '')
+        if toct: steps += 12 * int (toct)       # extra transposition of toct octaves
+        for clef in e.findall ('clef'):         # a part can have multiple staves
+            n = int (clef.get ('number', '1'))  # local staff number for this clef
+            sgn = clef.findtext ('sign')
+            line = clef.findtext ('line', '') if sgn not in ['percussion','TAB'] else ''
+            cs = teken.get (sgn + line, '')
+            oct = clef.findtext ('clef-octave-change', '') or '0'
+            if oct: cs += {-2:'-15', -1:'-8', 1:'+8', 2:'+15'}.get (int (oct), '')
+            s.clefOct [n] = -int (oct);         # xml playback pitch -> abc notation pitch
+            if steps: cs += ' transpose=' + str (steps)
+            stfdtl = e.find ('staff-details')
+            if stfdtl and int (stfdtl.get ('number', '1')) == n:
+                lines = stfdtl.findtext ('staff-lines')
+                if lines:
+                    lns= '|||' if lines == '3' and sgn == 'TAB' else lines
+                    cs += ' stafflines=%s' % lns
+                    s.stafflines = int (lines)  # remember for tab staves
+                strings = stfdtl.findall ('staff-tuning')
+                if strings:
+                    tuning = [st.findtext ('tuning-step') + st.findtext ('tuning-octave') for st in strings]
+                    cs += ' strings=%s' % ','.join (tuning)
+                capo = stfdtl.findtext ('capo')
+                if capo: cs += ' capo=%s' % capo
+            s.curClef [n] = cs                  # keep track of current clef (for percmap)
+            if first: s.clefMap [n] = cs        # clef goes to header (where it is mapped to voices)
+            else:
+                voices = s.stfMap[n]            # clef change to all voices of staff n
+                for v in voices:
+                    if n != s.curStf [v]:       # voice is not at its home staff n
+                        dstaff = n - s.curStf [v]
+                        s.curStf [v] = n        # reset current staff at start of measure to home position
+                        s.msc.appendElem (v, '[I:staff %+d]' % dstaff)
+                    s.msc.appendElem (v, '[K:%s]' % cs)
+    def findVoice (s, i, es):
+        stfnum = int (es[i].findtext ('staff',1))   # directions belong to a staff
+        vs = s.stfMap [stfnum]                      # voices in this staff
+        v1 = vs [0] if vs else 1                    # directions to first voice of staff
+        if s.dirtov1: return stfnum, v1, v1         # option --v1
+        for e in es [i+1:]:                         # or to the voice of the next note
+            if e.tag == 'note':
+                v = int (e.findtext ('voice', '1'))
+                if s.isSib: v += 100 * int (e.findtext ('staff', '1'))  # repair bug in Sibelius
+                stf = s.vce2stf [v]                 # use our own staff allocation
+                return stf, v, v1                   # voice of next note, first voice of staff
+            if e.tag == 'backup': break
+        return stfnum, v1, v1                       # no note found, fall back to v1
+    def doDirection (s, e, i, es): # parse a musicXML direction tag
+        def addDirection (x, vs, tijd, stfnum):
+            if not x: return
+            vs = s.stfMap [stfnum] if '!8v' in x else [vs]  # ottava's go to all voices of staff
+            for v in vs:
+                if tijd != None:   # insert at time of encounter
+                    s.msc.appendElemT (v, x.replace ('(',')').replace ('ped','ped-up'), tijd)
+                else:
+                    s.msc.appendElem (v, x)
+        def startStop (dtype, vs, stfnum=1):
+            typmap = {'down':'!8va(!', 'up':'!8vb(!', 'crescendo':'!<(!', 'diminuendo':'!>(!', 'start':'!ped!'}
+            type = t.get ('type', '')
+            k = dtype + t.get ('number', '1')       # key to match the closing direction
+            if type in typmap:                      # opening the direction
+                x = typmap [type]
+                if k in s.dirStk:                   # closing direction already encountered
+                    stype, tijd = s.dirStk [k]; del s.dirStk [k]
+                    if stype == 'stop':
+                        addDirection (x, vs, tijd, stfnum)
+                    else:
+                        info ('%s direction %s has no stop in part %d, measure %d, voice %d' % (dtype, stype, s.msr.ixp+1, s.msr.ixm+1, vs+1))
+                        s.dirStk [k] = ((type , vs))    # remember voice and type for closing
+                else:
+                    s.dirStk [k] = ((type , vs))    # remember voice and type for closing
+            elif type == 'stop':
+                if k in s.dirStk:                   # matching open direction found
+                    type, vs = s.dirStk [k]; del s.dirStk [k]   # into the same voice
+                    if type == 'stop':
+                        info ('%s direction %s has double stop in part %d, measure %d, voice %d' % (dtype, type, s.msr.ixp+1, s.msr.ixm+1, vs+1))
+                        x = ''
+                    else:
+                        x = typmap [type].replace ('(',')').replace ('ped','ped-up')
+                else:                               # closing direction found before opening
+                    s.dirStk [k] = ('stop', s.msc.tijd)
+                    x = ''                          # delay code generation until opening found
+            else: raise ValueError ('wrong direction type')
+            addDirection (x, vs, None, stfnum)
+        tempo, wrdstxt = None, ''
+        plcmnt = e.get ('placement')
+        stf, vs, v1 = s.findVoice (i, es)
+        jmp = ''    # for jump sound elements: dacapo, dalsegno and family
+        jmps = [('dacapo','D.C.'),('dalsegno','D.S.'),('tocoda','dacoda'),('fine','fine'),('coda','O'),('segno','S')]
+        t = e.find ('sound')        # there are many possible attributes for sound
+        if t != None:
+            minst = t.find ('midi-instrument')
+            if minst:
+                prg = t.findtext ('midi-instrument/midi-program')
+                chn = t.findtext ('midi-instrument/midi-channel')
+                vids = [v for v, id in s.vceInst.items () if id == minst.get ('id')]
+                if vids: vs = vids [0]          # direction for the indentified voice, not the staff
+                parm, inst = ('program', str (int (prg) - 1)) if prg else ('channel', chn)
+                if inst and abcOut.volpan > 0: s.msc.appendElem (vs, '[I:MIDI= %s %s]' % (parm, inst))
+            tempo = t.get ('tempo') # look for tempo attribute
+            if tempo:
+                tempo = '%.0f' % float (tempo) # hope it is a number and insert in voice 1
+                tempo_units = (1,4) # always 1/4 for sound elements!
+            for r, v in jmps:
+                if t.get (r, ''): jmp = v; break
+        dirtypes = e.findall ('direction-type')
+        for dirtyp in dirtypes:
+            units = { 'whole': (1,1), 'half': (1,2), 'quarter': (1,4), 'eighth': (1,8) }
+            metr = dirtyp.find ('metronome')
+            if metr != None:
+                t = metr.findtext ('beat-unit', '')
+                if t in units:  tempo_units = units [t]
+                else:           tempo_units = units ['quarter']
+                if metr.find ('beat-unit-dot') != None:
+                    tempo_units = simplify (tempo_units [0] * 3, tempo_units [1] * 2)
+                tmpro = re.search ('[.\d]+', metr.findtext ('per-minute'))  # look for a number
+                if tmpro: tempo = tmpro.group () # overwrites the value set by the sound element of this direction
+            t = dirtyp.find ('wedge')
+            if t != None: startStop ('wedge', vs)
+            allwrds = dirtyp.findall ('words')        # insert text annotations
+            if not allwrds: allwrds = dirtyp.findall ('rehearsal')   # treat rehearsal mark as text annotation
+            for wrds in allwrds:
+                if jmp: # ignore the words when a jump sound element is present in this direction
+                    s.msc.appendElem (vs, '!%s!' % jmp , 1) # to voice
+                    break
+                plc = plcmnt == 'below' and '_' or '^'
+                if float (wrds.get ('default-y', '0')) < 0: plc = '_'
+                wrdstxt += (wrds.text or '').replace ('"','\\"').replace ('\n', '\\n')
+            wrdstxt = wrdstxt.strip ()
+            for key, val in dynamics_map.items ():
+                if dirtyp.find ('dynamics/' + key) != None:
+                    s.msc.appendElem (vs, val, 1)   # to voice
+            if dirtyp.find ('coda') != None: s.msc.appendElem (vs, 'O', 1)
+            if dirtyp.find ('segno') != None: s.msc.appendElem (vs, 'S', 1)
+            t = dirtyp.find ('octave-shift')
+            if t != None: startStop ('octave-shift', vs, stf)   # assume size == 8 for the time being
+            t = dirtyp.find ('pedal')
+            if t != None and s.ped:
+                if not s.pedVce: s.pedVce = vs
+                startStop ('pedal', s.pedVce)
+            if dirtyp.findtext ('other-direction') == 'diatonic fretting': s.diafret = 1;
+        if tempo:
+            tempo = '%.0f' % float (tempo) # hope it is a number and insert in voice 1
+            if s.msc.tijd == 0 and s.msr.ixm == 0:      # first measure -> header
+                abcOut.tempo = tempo
+                abcOut.tempo_units = tempo_units
+            else:
+                s.msc.appendElem (v1, '[Q:%d/%d=%s]' % (tempo_units [0], tempo_units [1], tempo))   # otherwise -> 1st voice
+        if wrdstxt: s.msc.appendElem (vs, '"%s%s"' % (plc, wrdstxt), 1) # to voice, but after tempo
+    def doHarmony (s, e, i, es):    # parse a musicXMl harmony tag
+        _, vt, _ = s.findVoice (i, es)
+        short = {'major':'', 'minor':'m', 'augmented':'+', 'diminished':'dim', 'dominant':'7', 'half-diminished':'m7b5'}
+        accmap = {'major':'maj', 'dominant':'', 'minor':'m', 'diminished':'dim', 'augmented':'+', 'suspended':'sus'}
+        modmap = {'second':'2', 'fourth':'4', 'seventh':'7', 'sixth':'6', 'ninth':'9', '11th':'11', '13th':'13'}
+        altmap = {'1':'#', '0':'', '-1':'b'}
+        root = e.findtext ('root/root-step','')
+        alt = altmap.get (e.findtext ('root/root-alter'), '')
+        sus = ''
+        kind = e.findtext ('kind', '')
+        if kind in short: kind = short [kind]
+        elif '-' in kind:   # xml chord names: <triad name>-<modification>
+            triad, mod = kind.split ('-')
+            kind = accmap.get (triad, '') + modmap.get (mod, '')
+            if kind.startswith ('sus'): kind, sus = '', kind    # sus-suffix goes to the end
+        elif kind == 'none': kind = e.find ('kind').get ('text','')
+        degrees = e.findall ('degree')
+        for d in degrees:   # chord alterations
+            kind += altmap.get (d.findtext ('degree-alter'),'') + d.findtext ('degree-value','')
+        kind = kind.replace ('79','9').replace ('713','13').replace ('maj6','6')
+        bass = e.findtext ('bass/bass-step','') + altmap.get (e.findtext ('bass/bass-alter'),'')
+        s.msc.appendElem (vt, '"%s%s%s%s%s"' % (root, alt, kind, sus, bass and '/' + bass), 1)
+    def doBarline (s, e):       # 0 = no repeat, 1 = begin repeat, 2 = end repeat
+        rep = e.find ('repeat')
+        if rep != None: rep = rep.get ('direction')
+        if s.unfold:            # unfold repeat, don't translate barlines
+            return rep and (rep == 'forward' and 1 or 2) or 0
+        loc = e.get ('location', 'right')   # right is the default
+        if loc == 'right':      # only change style for the right side
+            style = e.findtext ('bar-style')
+            if   style == 'light-light': s.msr.rline = '||'
+            elif style == 'light-heavy': s.msr.rline = '|]'
+        if rep != None:         # repeat found
+            if rep == 'forward': s.msr.lline = ':'
+            else:                s.msr.rline = ':|' # override barline style
+        end = e.find ('ending')
+        if end != None:
+            if end.get ('type') == 'start':
+                n = end.get ('number', '1').replace ('.','').replace (' ','')
+                try: list (map (int, n.split (',')))    # should be a list of integers
+                except: n = '"%s"' % n.strip ()         # illegal musicXML
+                s.msr.lnum = n          # assume a start is always at the beginning of a measure
+            elif s.msr.rline == '|':    # stop and discontinue the same  in ABC ?
+                s.msr.rline = '||'      # to stop on a normal barline use || in ABC ?
+        return 0
+    def doPrint (s, e):     # print element, measure number -> insert a line break
+        if e.get ('new-system') == 'yes' or e.get ('new-page') == 'yes':
+            if not s.nolbrk: return '$'  # a line break
+    def doPartList (s, e):  # translate the start/stop-event-based xml-partlist into proper tree
+        for sp in e.findall ('part-list/score-part'):
+            midi = {}
+            for m in sp.findall ('midi-instrument'):
+                x = [m.findtext (p, s.midDflt [i]) for i,p in enumerate (['midi-channel','midi-program','volume','pan'])]
+                pan = float (x[3])
+                if pan >= -90 and pan <= 90:    # would be better to map behind-pannings
+                    pan = (float (x[3]) + 90) / 180 * 127   # xml between -90 and +90
+                midi [m.get ('id')] = [int (x[0]), int (x[1]), float (x[2]) * 1.27, pan]    # volume 100 -> midi 127
+                up = m.findtext ('midi-unpitched')
+                if up: s.drumInst [m.get ('id')] = int (up) - 1 # store midi-pitch for channel 10 notes
+            s.instMid.append (midi)
+        ps = e.find ('part-list')               # partlist  = [groupelem]
+        xs = getPartlist (ps)                   # groupelem = partname | grouplist
+        partlist, _ = parseParts (xs, {}, [])   # grouplist = [groupelem, ..., groupdata]
+        return partlist                         # groupdata = [group-symbol, group-barline, group-name, group-abbrev]
+    def mkTitle (s, e):
+        def filterCredits (y):  # y == filter level, higher filters less
+            cs = []
+            for x in credits:   # skip redundant credit lines
+                if y < 6 and (x in title or x in mvttl): continue         # sure skip
+                if y < 5 and (x in composer or x in lyricist): continue   # almost sure skip
+                if y < 4 and ((title and title in x) or (mvttl and mvttl in x)): continue   # may skip too much
+                if y < 3 and ([1 for c in composer if c in x] or [1 for c in lyricist if c in x]): continue # skips too much
+                if y < 2 and re.match (r'^[\d\W]*$', x): continue       # line only contains numbers and punctuation
+                cs.append (x)
+            if y == 0 and (title + mvttl): cs = ''  # default: only credit when no title set
+            return cs
+        title = e.findtext ('work/work-title', '').strip ()
+        mvttl = e.findtext ('movement-title', '').strip ()
+        composer, lyricist, credits = [], [], []
+        for creator in e.findall ('identification/creator'):
+            if creator.text:
+                if creator.get ('type') == 'composer':
+                    composer += [line.strip () for line in creator.text.split ('\n')]
+                elif creator.get ('type') in ('lyricist', 'transcriber'):
+                    lyricist += [line.strip () for line in creator.text.split ('\n')]
+        for rights in e.findall ('identification/rights'):
+            if rights.text:
+                lyricist += [line.strip () for line in rights.text.split ('\n')]
+        for credit in e.findall('credit'):
+            cs = ''.join (e.text or '' for e in credit.findall('credit-words'))
+            credits += [re.sub (r'\s*[\r\n]\s*', ' ', cs)]
+        credits = filterCredits (s.ctf)
+        if title: title = 'T:%s\n' % title.replace ('\n', '\nT:')
+        if mvttl: title += 'T:%s\n' % mvttl.replace ('\n', '\nT:')
+        if credits: title += '\n'.join (['T:%s' % c for c in credits]) + '\n'
+        if composer: title += '\n'.join (['C:%s' % c for c in composer]) + '\n'
+        if lyricist: title += '\n'.join (['Z:%s' % c for c in lyricist]) + '\n'
+        if title: abcOut.title = title[:-1]
+        s.isSib = 'Sibelius' in (e.findtext ('identification/encoding/software') or '')
+        if s.isSib: info ('Sibelius MusicXMl is unreliable')
+    def doDefaults (s, e):
+        if not s.doPageFmt: return  # return if -pf option absent
+        d = e.find ('defaults');
+        if d == None: return;
+        mils = d.findtext ('scaling/millimeters')   # mills == staff height (mm)
+        tenths = d.findtext ('scaling/tenths')      # staff height in tenths
+        if not mils or not tenths: return
+        xmlScale = float (mils) / float (tenths) / 10   # tenths -> mm
+        space = 10 * xmlScale       # space between staff lines == 10 tenths
+        abcScale = space / 0.2117   # 0.2117 cm = 6pt = space between staff lines for scale = 1.0 in abcm2ps
+        abcOut.pageFmt ['scale'] = abcScale
+        eks = 2 * ['page-layout/'] + 4 * ['page-layout/page-margins/']
+        eks = [a+b for a,b in zip (eks, 'page-height,page-width,left-margin,right-margin,top-margin,bottom-margin'.split (','))]
+        for i in range (6):
+            v = d.findtext (eks [i])
+            k = abcOut.pagekeys [i+1]   # pagekeys [0] == scale already done, skip it
+            if not abcOut.pageFmt [k] and v:
+                try: abcOut.pageFmt [k] = float (v) * xmlScale  # -> cm
+                except: info ('illegal value %s for xml element %s', (v, eks [i])); continue    # just skip illegal values
+    def locStaffMap (s, part, maten):   # map voice to staff with majority voting
+        vmap = {}   # {voice -> {staff -> n}} count occurrences of voice in staff
+        s.vceInst = {}          # {voice -> instrument id} for this part
+        s.msc.vnums = {}        # voice id's
+        s.hasStems = {}         # XML voice nums with at least one note with a stem (for tab key)
+        s.stfMap, s.clefMap = {}, {}    # staff -> [voices], staff -> clef
+        ns = part.findall ('measure/note')
+        for n in ns:            # count staff allocations for all notes
+            v = int (n.findtext ('voice', '1'))
+            if s.isSib: v += 100 * int (n.findtext ('staff', '1'))  # repair bug in Sibelius
+            s.msc.vnums [v] = 1 # collect all used voice id's in this part
+            sn = int (n.findtext ('staff', '1'))
+            s.stfMap [sn] = []
+            if v not in vmap:
+                vmap [v] = {sn:1}
+            else:
+                d = vmap[v]     # counter for voice v
+                d[sn] = d.get (sn, 0) + 1   # ++ number of allocations for staff sn
+            x = n.find ('instrument')
+            if x != None: s.vceInst [v] = x.get ('id')
+            x, noRest = n.findtext ('stem'), n.find ('rest') == None
+            if noRest and (not x or x != 'none'): s.hasStems [v] = 1    # XML voice v has at least one stem
+        vks = list (vmap.keys ())
+        if s.jscript or s.isSib: vks.sort ()
+        for v in vks:           # choose staff with most allocations for each voice
+            xs = [(n, sn) for sn, n in vmap[v].items ()]
+            xs.sort ()
+            stf = xs[-1][1]     # the winner: staff with most notes of voice v
+            s.stfMap [stf].append (v)
+            s.vce2stf [v] = stf # reverse map
+            s.curStf [v] = stf  # current staff of XML voice v
+    def addStaffMap (s, vvmap): # vvmap: xml voice number -> global abc voice number
+        part = [] # default: brace on staffs of one part
+        for stf, voices in sorted (s.stfMap.items ()):  # s.stfMap has xml staff and voice numbers
+            locmap = [vvmap [iv] for iv in voices if iv in vvmap]
+            nostem = [(iv not in s.hasStems) for iv in voices if iv in vvmap]   # same order as locmap
+            if locmap:          # abc voice number of staff stf
+                part.append (locmap)
+                clef = s.clefMap.get (stf, 'treble')    # {xml staff number -> clef}
+                for i, iv in enumerate (locmap):
+                    clef_attr = ''
+                    if clef.startswith ('tab'):
+                        if nostem [i] and 'nostems' not in clef: clef_attr = ' nostems'
+                        if s.diafret and 'diafret' not in clef: clef_attr += ' diafret' # for all voices in the part
+                    abcOut.clefs [iv] = clef + clef_attr # add nostems when all notes of voice had no stem
+        s.gStfMap.append (part)
+    def addMidiMap (s, ip, vvmap):      # map abc voices to midi settings
+        instr = s.instMid [ip]          # get the midi settings for this part
+        if instr.values (): defInstr = list(instr.values ())[0]   # default settings = first instrument
+        else:               defInstr = s.midDflt    # no instruments defined
+        xs = []
+        for v, vabc in vvmap.items ():  # xml voice num, abc voice num
+            ks = sorted (s.drumNotes.items ())
+            ds = [(nt, step, midi, head) for (vd, nt), (step, midi, head) in ks if v == vd] # map perc notes
+            id = s.vceInst.get (v, '')  # get the instrument-id for part with multiple instruments
+            if id in instr:             # id is defined as midi-instrument in part-list
+                   xs.append ((vabc, instr [id] + ds))  # get midi settings for id
+            else:  xs.append ((vabc, defInstr   + ds))  # only one instrument for this part
+        xs.sort ()  # put abc voices in order
+        s.midiMap.extend ([midi for v, midi in xs])
+        snaarmap = ['E','G','B','d', 'f', 'a', "c'", "e'"]
+        diamap = '0,1-,1,1+,2,3,3,4,4,5,6,6+,7,8-,8,8+,9,10,10,11,11,12,13,13+,14'.split (',')
+        for k in sorted (s.tabmap.keys ()): # add %%map's for all tab voices
+            v, noot = k;
+            snaar, fret = s.tabmap [k];
+            if s.diafret: fret = diamap [int (fret)]
+            vabc = vvmap [v]
+            snaar = s.stafflines - int (snaar)
+            xs = s.tabVceMap.get (vabc, [])
+            xs.append ('%%%%map tab%d %s print=%s heads=kop%s\n' % (vabc, noot, snaarmap [snaar], fret))
+            s.tabVceMap [vabc] = xs
+            s.koppen [fret] = 1  # collect noteheads for SVG defs
+    def parse (s, fobj):
+        vvmapAll = {}   # collect xml->abc voice maps (vvmap) of all parts
+        e = E.parse (fobj)
+        s.mkTitle (e)
+        s.doDefaults (e)
+        partlist = s.doPartList (e)
+        parts = e.findall ('part')
+        for ip, p in enumerate (parts):
+            maten = p.findall ('measure')
+            s.locStaffMap (p, maten)        # {voice -> staff} for this part
+            s.drumNotes = {}    # (xml voice, abc note) -> (midi note, note head)
+            s.clefOct = {}      # xml staff number -> current clef-octave-change
+            s.curClef = {}      # xml staff number -> current abc clef
+            s.stemDir = {}      # xml voice number -> current stem direction
+            s.tabmap = {}       # (xml voice, abc note) -> (string, fret)
+            s.diafret = 0       # use diatonic fretting
+            s.stafflines = 5
+            s.msc.initVoices (newPart = 1)  # create all voices
+            aantalHerhaald = 0  # keep track of number of repititions
+            herhaalMaat = 0     # target measure of the repitition
+            divisions = []      # current value of <divisions> for each measure
+            s.msr = Measure (ip)   # various measure data
+            while s.msr.ixm < len (maten):
+                maat = maten [s.msr.ixm]
+                herhaal, lbrk = 0, ''
+                s.msr.reset ()
+                s.curalts = {}  # passing accidentals are reset each measure
+                es = list (maat)
+                for i, e in enumerate (es):
+                    if   e.tag == 'note':       s.doNote (e)
+                    elif e.tag == 'attributes': s.doAttr (e)
+                    elif e.tag == 'direction':  s.doDirection (e, i, es)
+                    elif e.tag == 'sound':      s.doDirection (maat, i, es) # sound element directly in measure!
+                    elif e.tag == 'harmony':    s.doHarmony (e, i, es)
+                    elif e.tag == 'barline': herhaal = s.doBarline (e)
+                    elif e.tag == 'backup':
+                        dt = int (e.findtext ('duration'))
+                        if chkbug (dt, s.msr): s.msc.incTime (-dt)
+                    elif e.tag == 'forward':
+                        dt = int (e.findtext ('duration'))
+                        if chkbug (dt, s.msr): s.msc.incTime (dt)
+                    elif e.tag == 'print':  lbrk = s.doPrint (e)
+                s.msc.addBar (lbrk, s.msr)
+                divisions.append (s.msr.divs)
+                if herhaal == 1:
+                    herhaalMaat = s.msr.ixm
+                    s.msr.ixm += 1
+                elif herhaal == 2:
+                    if aantalHerhaald < 1:  # jump
+                        s.msr.ixm = herhaalMaat
+                        aantalHerhaald += 1
+                    else:
+                        aantalHerhaald = 0  # reset
+                        s.msr.ixm += 1      # just continue
+                else: s.msr.ixm += 1        # on to the next measure
+            for rv in s.repeat_str.values ():   # close hanging measure-repeats without stop
+                rv [0] = '[I:repeat %s %d]' % (rv [1], 1)
+            vvmap = s.msc.outVoices (divisions, ip, s.isSib)
+            s.addStaffMap (vvmap)           # update global staff map
+            s.addMidiMap (ip, vvmap)
+            vvmapAll.update (vvmap)
+        if vvmapAll:            # skip output if no part has any notes
+            abcOut.mkHeader (s.gStfMap, partlist, s.midiMap, s.tabVceMap, s.koppen)
+            abcOut.writeall ()
+        else: info ('nothing written, %s has no notes ...' % abcOut.fnmext)
+#----------------
+# Main Program
+#----------------
+if __name__ == '__main__':
+    from optparse import OptionParser
+    from glob import glob
+    from zipfile import ZipFile
+    ustr = '%prog [-h] [-u] [-m] [-c C] [-d D] [-n CPL] [-b BPL] [-o DIR] [-v V]\n'
+    ustr += '[-x] [-p PFMT] [-t] [-s] [-i] [--v1] [--noped] [--stems] <file1> [<file2> ...]'
+    parser = OptionParser (usage=ustr, version=str(VERSION))
+    parser.add_option ("-u", action="store_true", help="unfold simple repeats")
+    parser.add_option ("-m", action="store", help="0 -> no %%MIDI, 1 -> minimal %%MIDI, 2-> all %%MIDI", default=0)
+    parser.add_option ("-c", action="store", type="int", help="set credit text filter to C", default=0, metavar='C')
+    parser.add_option ("-d", action="store", type="int", help="set L:1/D", default=0, metavar='D')
+    parser.add_option ("-n", action="store", type="int", help="CPL: max number of characters per line (default 100)", default=0, metavar='CPL')
+    parser.add_option ("-b", action="store", type="int", help="BPL: max number of bars per line", default=0, metavar='BPL')
+    parser.add_option ("-o", action="store", help="store abc files in DIR", default='', metavar='DIR')
+    parser.add_option ("-v", action="store", type="int", help="set volta typesetting behaviour to V", default=0, metavar='V')
+    parser.add_option ("-x", action="store_true", help="output no line breaks")
+    parser.add_option ("-p", action="store", help="pageformat PFMT (cm) = scale, pageheight, pagewidth, leftmargin, rightmargin, topmargin, botmargin", default='', metavar='PFMT')
+    parser.add_option ("-j", action="store_true", help="switch for compatibility with javascript version")
+    parser.add_option ("-t", action="store_true", help="translate perc- and tab-staff to ABC code with %%map, %%voicemap")
+    parser.add_option ("-s", action="store_true", help="shift node heads 3 units left in a tab staff")
+    parser.add_option ("--v1", action="store_true", help="start-stop directions allways to first voice of staff")
+    parser.add_option ("--noped", action="store_false", help="skip all pedal directions", dest='ped', default=True)
+    parser.add_option ("--stems", action="store_true", help="translate stem directions", dest='stm', default=False)
+    parser.add_option ("-i", action="store_true", help="read xml file from standard input")
+    options, args = parser.parse_args ()
+    if options.n < 0: parser.error ('only values >= 0')
+    if options.b < 0: parser.error ('only values >= 0')
+    if options.d and options.d not in [2**n for n in range (10)]:
+        parser.error ('D should be on of %s' % ','.join ([str(2**n) for n in range (10)]))
+    options.p = options.p and options.p.split (',') or [] # ==> [] | [string]
+    if len (args) == 0 and not options.i: parser.error ('no input file given')
+    pad = options.o
+    if pad:
+        if not os.path.exists (pad): os.mkdir (pad)
+        if not os.path.isdir (pad): parser.error ('%s is not a directory' % pad)
+    fnmext_list = []
+    for i in args: fnmext_list += glob (i)
+    if options.i: fnmext_list = ['stdin.xml']
+    if not fnmext_list: parser.error ('none of the input files exist')
+    for X, fnmext in enumerate (fnmext_list):
+        fnm, ext = os.path.splitext (fnmext)
+        if ext.lower () not in ('.xml','.mxl','.musicxml'):
+            info ('skipped input file %s, it should have extension .xml or .mxl' % fnmext)
+            continue
+        if os.path.isdir (fnmext):
+            info ('skipped directory %s. Only files are accepted' % fnmext)
+            continue
+        if fnmext == 'stdin.xml':
+            fobj = sys.stdin
+        elif ext.lower () == '.mxl':        # extract .xml file from .mxl file
+            z = ZipFile(fnmext)
+            for n in z.namelist():          # assume there is always an xml file in a mxl archive !!
+                if (n[:4] != 'META') and (n[-4:].lower() == '.xml'):
+                    fobj = z.open (n)
+                    break   # assume only one MusicXML file per archive
+        else:
+            fobj = open (fnmext, 'rb')      # open regular xml file
+        abcOut = ABCoutput (fnm + '.abc', pad, X, options)  # create global ABC output object
+        psr = Parser (options)  # xml parser
+        try:
+            psr.parse (fobj)    # parse file fobj and write abc to <fnm>.abc
+        except:
+            etype, value, traceback = sys.exc_info ()   # works in python 2 & 3
+            info ('** %s occurred: %s in %s' % (etype, value, fnmext), 0)

semantic_search/README.md ADDED Viewed

	@@ -0,0 +1,62 @@

+# Semantic Search Codebase
+## Overview
+CLaMP 2 is a state-of-the-art multimodal music information retrieval system designed to work with 101 languages. This codebase includes scripts for evaluating model performance, performing semantic searches, and calculating similarity metrics based on CLaMP2-extracted **nomarlized** feature vectors from music or text data. Below is a description of the scripts contained in the `semantic_search/` folder.
+## Repository Structure
+The `semantic_search/` folder contains the following scripts:
+### 1. `clamp2_score.py`
+This script calculates the cosine similarity between the average feature vectors extracted from two sets of `.npy` files, serving as a measure of similarity between the reference and test datasets.
+It can be used to validate the semantic similarity between generated music and ground truth, providing an objective metric. Through empirical observation, we found that this metric aligns well with subjective judgments made by individuals with professional music expertise.
+**Usage:**
+```bash
+python clamp2_score.py <reference_folder> <test_folder>
+```
+- `reference_folder`: Path to the folder containing reference `.npy` files.
+- `test_folder`: Path to the folder containing test `.npy` files.
+**Functionality:**
+- Loads all `.npy` files from the specified folders.
+- Computes the average feature vector for each folder.
+- Calculates the cosine similarity between the two averaged vectors.
+- Outputs the similarity score rounded to four decimal places.
+### 2. `semantic_search.py`
+This script performs semantic search by calculating the cosine similarity between a query feature and a set of features stored in `.npy` files.
+**Usage:**
+```bash
+python semantic_search.py <query_file> <features_folder> [--top_k TOP_K]
+```
+- `query_file`: Path to the query feature file (e.g., `ballad.npy`).
+- `features_folder`: Path to the folder containing feature files for comparison.
+- `--top_k`: (Optional) Number of top similar items to display. Defaults to 10 if not specified.
+**Functionality:**
+- Loads a query feature from the specified file.
+- Loads feature vectors from the given folder.
+- Computes cosine similarity between the query feature and each loaded feature vector.
+- Displays the top K most similar features along with their similarity scores.
+### 3. `semantic_search_metrics.py`
+This script calculates evaluation metrics for semantic search by comparing query features to reference features.
+**Usage:**
+```bash
+python semantic_search_metrics.py <query_folder> <reference_folder>
+```
+- `query_folder`: Path to the folder containing query features (in `.npy` format).
+- `reference_folder`: Path to the folder containing reference features (in `.npy` format).
+**Functionality:**
+- Loads query features from the specified folder.
+- Loads reference features from the given folder.
+- Computes the following metrics based on cosine similarity:
+  - **Mean Reciprocal Rank (MRR)**
+  - **Hit@1**
+  - **Hit@10**
+  - **Hit@100**
+- Outputs the calculated metrics to the console.

semantic_search/clamp2_score.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import os
+import numpy as np
+import argparse
+def load_npy_files(folder_path):
+    """
+    Load all .npy files from a specified folder and return a list of numpy arrays.
+    """
+    npy_list = []
+    for file_name in os.listdir(folder_path):
+        if file_name.endswith('.npy'):
+            file_path = os.path.join(folder_path, file_name)
+            np_array = np.load(file_path)[0]
+            npy_list.append(np_array)
+    return npy_list
+def average_npy(npy_list):
+    """
+    Compute the average of a list of numpy arrays.
+    """
+    return np.mean(npy_list, axis=0)
+def cosine_similarity(vec1, vec2):
+    """
+    Compute cosine similarity between two numpy arrays.
+    """
+    dot_product = np.dot(vec1, vec2)
+    norm_vec1 = np.linalg.norm(vec1)
+    norm_vec2 = np.linalg.norm(vec2)
+    cosine_sim = dot_product / (norm_vec1 * norm_vec2)
+    return cosine_sim
+if __name__ == '__main__':
+    # Set up argument parsing for input folders
+    parser = argparse.ArgumentParser(description="Calculate cosine similarity between average feature vectors.")
+    parser.add_argument('reference', type=str, help='Path to the reference folder containing .npy files.')
+    parser.add_argument('test', type=str, help='Path to the test folder containing .npy files.')
+    args = parser.parse_args()
+    reference = args.reference
+    test = args.test
+    # Load .npy files
+    ref_npy = load_npy_files(reference)
+    test_npy = load_npy_files(test)
+    # Compute the average of each list of numpy arrays
+    avg_ref = average_npy(ref_npy)
+    avg_test = average_npy(test_npy)
+    # Compute the cosine similarity between the two averaged numpy arrays
+    similarity = cosine_similarity(avg_ref, avg_test)
+    # Output the cosine similarity rounded to four decimal places
+    print(f"Cosine similarity between '{reference}' and '{test}': {similarity:.4f}")

semantic_search/semantic_search.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import os
+import torch
+import numpy as np
+import argparse
+def get_info(folder_path):
+    """
+    Load all .npy files from a specified folder and return a dictionary of features.
+    """
+    files = sorted(os.listdir(folder_path))
+    features = {}
+    for file in files:
+        if file.endswith(".npy"):
+            key = file.split(".")[0]
+            features[key] = np.load(os.path.join(folder_path, file))[0]
+    return features
+def main(query_file, features_folder, top_k=10):
+    # Load query feature from the specified file
+    query_feature = np.load(query_file)[0]  # Load directly from the query file
+    query_tensor = torch.tensor(query_feature).unsqueeze(dim=0)
+    # Load key features from the specified folder
+    key_features = get_info(features_folder)
+    # Prepare tensor for key features
+    key_feats_tensor = torch.tensor(np.array([key_features[k] for k in key_features.keys()]))
+    # Calculate cosine similarity
+    similarities = torch.cosine_similarity(query_tensor, key_feats_tensor)
+    ranked_indices = torch.argsort(similarities, descending=True)
+    # Get the keys for the features
+    keys = list(key_features.keys())
+    print(f"Top {top_k} similar items:")
+    for i in range(top_k):
+        print(keys[ranked_indices[i]], similarities[ranked_indices[i]].item())
+if __name__ == '__main__':
+    # Set up argument parsing for input paths
+    parser = argparse.ArgumentParser(description="Find top similar features based on cosine similarity.")
+    parser.add_argument('query_file', type=str, help='Path to the query feature file (e.g., ballad.npy).')
+    parser.add_argument('features_folder', type=str, help='Path to the folder containing feature files for comparison.')
+    parser.add_argument('--top_k', type=int, default=10, help='Number of top similar items to display (default: 10).')
+    args = parser.parse_args()
+    # Execute the main functionality
+    main(args.query_file, args.features_folder, args.top_k)

semantic_search/semantic_search_metrics.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import os
+import torch
+import numpy as np
+import argparse
+def get_features(path):
+    """
+    Load and return feature data from .npy files in the given directory.
+    Each feature is stored in a dictionary with the filename (without extension) as the key.
+    """
+    files = sorted(os.listdir(path))
+    features = {}
+    for file in files:
+        if file.endswith(".npy"):
+            key = file.split(".")[0]
+            features[key] = np.load(os.path.join(path, file))[0]
+    return features
+def calculate_metrics(query_features, reference_features):
+    """
+    Calculate MRR, Hit@1, Hit@10, and Hit@100 metrics based on the similarity
+    between query and reference features.
+    """
+    common_keys = set(query_features.keys()) & set(reference_features.keys())
+    mrr, hit_1, hit_10, hit_100 = 0, 0, 0, 0
+    for idx, key in enumerate(common_keys):
+        # Convert query feature to tensor and add batch dimension
+        query_feat = torch.tensor(query_features[key]).unsqueeze(dim=0)
+        # Collect all reference features for common keys
+        ref_feats = torch.tensor(np.array([reference_features[k] for k in common_keys]))
+        # Compute cosine similarity between the query and all reference features
+        similarities = torch.cosine_similarity(query_feat, ref_feats)
+        # Create a list of (similarity, index) pairs
+        indexed_sims = list(enumerate(similarities.tolist()))
+        # Sort by similarity in descending order, with idx-based tie-breaking
+        sorted_indices = sorted(indexed_sims, key=lambda x: (x[1], x[0] == idx), reverse=True)
+        # Extract the sorted rank list
+        ranks = [x[0] for x in sorted_indices]
+        # Calculate MRR
+        mrr += 1 / (ranks.index(idx) + 1)
+        # Calculate Hit@1, Hit@10, Hit@100
+        if idx in ranks[:100]:
+            hit_100 += 1
+            if idx in ranks[:10]:
+                hit_10 += 1
+                if idx in ranks[:1]:
+                    hit_1 += 1
+    # Compute the final metrics
+    total_keys = len(common_keys)
+    print(f"MRR: {round(mrr / total_keys, 4)}")
+    print(f"Hit@1: {round(hit_1 / total_keys, 4)}")
+    print(f"Hit@10: {round(hit_10 / total_keys, 4)}")
+    print(f"Hit@100: {round(hit_100 / total_keys, 4)}")
+if __name__ == '__main__':
+    # Set up argument parsing for input directories
+    parser = argparse.ArgumentParser(description="Calculate similarity metrics between query and reference features.")
+    parser.add_argument('query_folder', type=str, help='Path to the folder containing query features (.npy files).')
+    parser.add_argument('reference_folder', type=str, help='Path to the folder containing reference features (.npy files).')
+    args = parser.parse_args()
+    # Load features from the specified folders
+    query_features = get_features(args.query_folder)
+    reference_features = get_features(args.reference_folder)
+    # Calculate and print the metrics
+    calculate_metrics(query_features, reference_features)