Upload 17 files
Browse files- .gitattributes +1 -0
- inference/.DS_Store +0 -0
- inference/data/base_config.yaml +106 -0
- inference/data/btc_model.pt +3 -0
- inference/data/btc_model_large_voca.pt +3 -0
- inference/data/chord.json +1 -0
- inference/data/chord_attr.json +1 -0
- inference/data/chord_attr_inv.json +16 -0
- inference/data/chord_inv.json +1 -0
- inference/data/chord_root.json +1 -0
- inference/data/chord_root_inv.json +15 -0
- inference/data/prep_config.yaml +51 -0
- inference/data/run_config.yaml +43 -0
- inference/data/tag_list.npy +3 -0
- inference/data/test_config.yaml +41 -0
- inference/data/train_config.yaml +94 -0
- inference/input/test.mp3 +3 -0
- inference/temp_out/.DS_Store +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
inference/input/test.mp3 filter=lfs diff=lfs merge=lfs -text
|
inference/.DS_Store
ADDED
Binary file (8.2 kB). View file
|
|
inference/data/base_config.yaml
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: "1.34"
|
2 |
+
|
3 |
+
lr: 1e-4
|
4 |
+
log_step: 1
|
5 |
+
split: 0
|
6 |
+
batch_size: 8
|
7 |
+
sr: 16000
|
8 |
+
|
9 |
+
datasets:
|
10 |
+
- jamendo
|
11 |
+
- emomusic
|
12 |
+
- pmemo
|
13 |
+
- deam
|
14 |
+
|
15 |
+
model:
|
16 |
+
encoder: "MERT"
|
17 |
+
layers:
|
18 |
+
- 5
|
19 |
+
- 6
|
20 |
+
classifier: "linear-mt-attn-ck"
|
21 |
+
# - linear
|
22 |
+
# - linear-attn-ck
|
23 |
+
# - linear-mt-attn-ck
|
24 |
+
|
25 |
+
kd: True
|
26 |
+
kd_weight: 0.8
|
27 |
+
kd_temperature: 1
|
28 |
+
lr: 1e-4
|
29 |
+
|
30 |
+
# audio_path: './dataset/jamendo'
|
31 |
+
# subset: 'moodtheme'
|
32 |
+
|
33 |
+
dataset:
|
34 |
+
jamendo:
|
35 |
+
root: './dataset/jamendo'
|
36 |
+
subset: 'moodtheme'
|
37 |
+
batch_size: 8
|
38 |
+
output_size : 56
|
39 |
+
split: 0
|
40 |
+
segment_type: "all" # [all,f10s,f30s,10s,30s]
|
41 |
+
num_workers: 4
|
42 |
+
deam:
|
43 |
+
root: './dataset/deam'
|
44 |
+
batch_size: 8
|
45 |
+
output_size : 2
|
46 |
+
segment_type: "all" # [all,f10s,f30s,10s,30s]
|
47 |
+
num_workers: 4
|
48 |
+
pmemo:
|
49 |
+
root: './dataset/pmemo'
|
50 |
+
batch_size: 8
|
51 |
+
output_size : 2
|
52 |
+
segment_type: "all" # [all,f10s,f30s,10s,30s]
|
53 |
+
num_workers: 4
|
54 |
+
emomusic:
|
55 |
+
root: './dataset/emomusic'
|
56 |
+
batch_size: 8
|
57 |
+
output_size : 2
|
58 |
+
segment_type: "all" # [all,f10s,f30s,10s,30s]
|
59 |
+
num_workers: 4
|
60 |
+
|
61 |
+
|
62 |
+
|
63 |
+
# --------------------------------------- #
|
64 |
+
genre_class_size: 87
|
65 |
+
mood_class_size: 56
|
66 |
+
instr_class_size: 40
|
67 |
+
dac_latents_size: 72
|
68 |
+
dac_rvq_size: 9
|
69 |
+
# --------------------------------------- #
|
70 |
+
|
71 |
+
|
72 |
+
#PMEMO BEST (0.5360 0.7772), mt: (0.5401 0.7780)
|
73 |
+
checkpoint_pmemo: "tb_logs/best/P.ckpt"
|
74 |
+
|
75 |
+
#DEAM BEST (0.5131 0.6025), mt: (0.5150 0.6125)
|
76 |
+
checkpoint_deam: "tb_logs/best/D.ckpt"
|
77 |
+
|
78 |
+
#EMOMUSIC BEST (0.5957 0.7489), mt: (0.6091 0.7525)
|
79 |
+
checkpoint_emomusic: "tb_logs/best/E.ckpt"
|
80 |
+
|
81 |
+
#JAMENDO BEST (0.1521 0.7806)
|
82 |
+
checkpoint_jamendo: "tb_logs/best/J.ckpt"
|
83 |
+
|
84 |
+
|
85 |
+
|
86 |
+
|
87 |
+
# datasets:
|
88 |
+
# - jamendo
|
89 |
+
# - pmemo
|
90 |
+
# - deam
|
91 |
+
# - emomusic
|
92 |
+
# - pmemo
|
93 |
+
# - jamendo
|
94 |
+
# datasets_val:
|
95 |
+
# - emomusic
|
96 |
+
# model_save_path: './saved_models/'
|
97 |
+
# results_save_path: './results/'
|
98 |
+
# hydra:
|
99 |
+
# job:
|
100 |
+
# chdir: True
|
101 |
+
# - MERT M2L LIBROSA Encodec DAC
|
102 |
+
# aggr_method: "mean"
|
103 |
+
# - mean
|
104 |
+
# - median
|
105 |
+
# - 80th_percentile
|
106 |
+
# - max
|
inference/data/btc_model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71c2c5db17e8c43b8a9a9da5db36ef2d667158c07a214eba16344c154c00bf54
|
3 |
+
size 12154754
|
inference/data/btc_model_large_voca.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1673d23f8f9a55ae7f9e8b80a51da616debb22675b8d8b67ea6ce0ef37b0ab51
|
3 |
+
size 12229576
|
inference/data/chord.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"N": 0, "C": 1, "C:dim": 2, "C:sus4": 3, "C:min7": 4, "C:min": 5, "C:sus2": 6, "C:aug": 7, "C:dim7": 8, "C:maj6": 9, "C:hdim7": 10, "C:7": 11, "C:min6": 12, "C:maj7": 13, "C#": 14, "C#:dim": 15, "C#:sus4": 16, "C#:min7": 17, "C#:min": 18, "C#:sus2": 19, "C#:aug": 20, "C#:dim7": 21, "C#:maj6": 22, "C#:hdim7": 23, "C#:7": 24, "C#:min6": 25, "C#:maj7": 26, "D": 27, "D:dim": 28, "D:sus4": 29, "D:min7": 30, "D:min": 31, "D:sus2": 32, "D:aug": 33, "D:dim7": 34, "D:maj6": 35, "D:hdim7": 36, "D:7": 37, "D:min6": 38, "D:maj7": 39, "D#": 40, "D#:dim": 41, "D#:sus4": 42, "D#:min7": 43, "D#:min": 44, "D#:sus2": 45, "D#:aug": 46, "D#:dim7": 47, "D#:maj6": 48, "D#:hdim7": 49, "D#:7": 50, "D#:min6": 51, "D#:maj7": 52, "E": 53, "E:dim": 54, "E:sus4": 55, "E:min7": 56, "E:min": 57, "E:sus2": 58, "E:aug": 59, "E:dim7": 60, "E:maj6": 61, "E:hdim7": 62, "E:7": 63, "E:min6": 64, "E:maj7": 65, "F": 66, "F:dim": 67, "F:sus4": 68, "F:min7": 69, "F:min": 70, "F:sus2": 71, "F:aug": 72, "F:dim7": 73, "F:maj6": 74, "F:hdim7": 75, "F:7": 76, "F:min6": 77, "F:maj7": 78, "F#": 79, "F#:dim": 80, "F#:sus4": 81, "F#:min7": 82, "F#:min": 83, "F#:sus2": 84, "F#:aug": 85, "F#:dim7": 86, "F#:maj6": 87, "F#:hdim7": 88, "F#:7": 89, "F#:min6": 90, "F#:maj7": 91, "G": 92, "G:dim": 93, "G:sus4": 94, "G:min7": 95, "G:min": 96, "G:sus2": 97, "G:aug": 98, "G:dim7": 99, "G:maj6": 100, "G:hdim7": 101, "G:7": 102, "G:min6": 103, "G:maj7": 104, "G#": 105, "G#:dim": 106, "G#:sus4": 107, "G#:min7": 108, "G#:min": 109, "G#:sus2": 110, "G#:aug": 111, "G#:dim7": 112, "G#:maj6": 113, "G#:hdim7": 114, "G#:7": 115, "G#:min6": 116, "G#:maj7": 117, "A": 118, "A:dim": 119, "A:sus4": 120, "A:min7": 121, "A:min": 122, "A:sus2": 123, "A:aug": 124, "A:dim7": 125, "A:maj6": 126, "A:hdim7": 127, "A:7": 128, "A:min6": 129, "A:maj7": 130, "A#": 131, "A#:dim": 132, "A#:sus4": 133, "A#:min7": 134, "A#:min": 135, "A#:sus2": 136, "A#:aug": 137, "A#:dim7": 138, "A#:maj6": 139, "A#:hdim7": 140, "A#:7": 141, "A#:min6": 142, "A#:maj7": 143, "B": 144, "B:dim": 145, "B:sus4": 146, "B:min7": 147, "B:min": 148, "B:sus2": 149, "B:aug": 150, "B:dim7": 151, "B:maj6": 152, "B:hdim7": 153, "B:7": 154, "B:min6": 155, "B:maj7": 156, "X": 157 }
|
inference/data/chord_attr.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"N": 0, "maj": 1, "dim": 2, "sus4": 3, "min7": 4, "min": 5, "sus2": 6, "aug": 7, "dim7": 8, "maj6": 9, "hdim7": 10, "7": 11, "min6": 12, "maj7": 13}
|
inference/data/chord_attr_inv.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"0": "N",
|
3 |
+
"1": "maj",
|
4 |
+
"2": "dim",
|
5 |
+
"3": "sus4",
|
6 |
+
"4": "min7",
|
7 |
+
"5": "min",
|
8 |
+
"6": "sus2",
|
9 |
+
"7": "aug",
|
10 |
+
"8": "dim7",
|
11 |
+
"9": "maj6",
|
12 |
+
"10": "hdim7",
|
13 |
+
"11": "7",
|
14 |
+
"12": "min6",
|
15 |
+
"13": "maj7"
|
16 |
+
}
|
inference/data/chord_inv.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"0": "N", "1": "C", "2": "C:dim", "3": "C:sus4", "4": "C:min7", "5": "C:min", "6": "C:sus2", "7": "C:aug", "8": "C:dim7", "9": "C:maj6", "10": "C:hdim7", "11": "C:7", "12": "C:min6", "13": "C:maj7", "14": "C#", "15": "C#:dim", "16": "C#:sus4", "17": "C#:min7", "18": "C#:min", "19": "C#:sus2", "20": "C#:aug", "21": "C#:dim7", "22": "C#:maj6", "23": "C#:hdim7", "24": "C#:7", "25": "C#:min6", "26": "C#:maj7", "27": "D", "28": "D:dim", "29": "D:sus4", "30": "D:min7", "31": "D:min", "32": "D:sus2", "33": "D:aug", "34": "D:dim7", "35": "D:maj6", "36": "D:hdim7", "37": "D:7", "38": "D:min6", "39": "D:maj7", "40": "D#", "41": "D#:dim", "42": "D#:sus4", "43": "D#:min7", "44": "D#:min", "45": "D#:sus2", "46": "D#:aug", "47": "D#:dim7", "48": "D#:maj6", "49": "D#:hdim7", "50": "D#:7", "51": "D#:min6", "52": "D#:maj7", "53": "E", "54": "E:dim", "55": "E:sus4", "56": "E:min7", "57": "E:min", "58": "E:sus2", "59": "E:aug", "60": "E:dim7", "61": "E:maj6", "62": "E:hdim7", "63": "E:7", "64": "E:min6", "65": "E:maj7", "66": "F", "67": "F:dim", "68": "F:sus4", "69": "F:min7", "70": "F:min", "71": "F:sus2", "72": "F:aug", "73": "F:dim7", "74": "F:maj6", "75": "F:hdim7", "76": "F:7", "77": "F:min6", "78": "F:maj7", "79": "F#", "80": "F#:dim", "81": "F#:sus4", "82": "F#:min7", "83": "F#:min", "84": "F#:sus2", "85": "F#:aug", "86": "F#:dim7", "87": "F#:maj6", "88": "F#:hdim7", "89": "F#:7", "90": "F#:min6", "91": "F#:maj7", "92": "G", "93": "G:dim", "94": "G:sus4", "95": "G:min7", "96": "G:min", "97": "G:sus2", "98": "G:aug", "99": "G:dim7", "100": "G:maj6", "101": "G:hdim7", "102": "G:7", "103": "G:min6", "104": "G:maj7", "105": "G#", "106": "G#:dim", "107": "G#:sus4", "108": "G#:min7", "109": "G#:min", "110": "G#:sus2", "111": "G#:aug", "112": "G#:dim7", "113": "G#:maj6", "114": "G#:hdim7", "115": "G#:7", "116": "G#:min6", "117": "G#:maj7", "118": "A", "119": "A:dim", "120": "A:sus4", "121": "A:min7", "122": "A:min", "123": "A:sus2", "124": "A:aug", "125": "A:dim7", "126": "A:maj6", "127": "A:hdim7", "128": "A:7", "129": "A:min6", "130": "A:maj7", "131": "A#", "132": "A#:dim", "133": "A#:sus4", "134": "A#:min7", "135": "A#:min", "136": "A#:sus2", "137": "A#:aug", "138": "A#:dim7", "139": "A#:maj6", "140": "A#:hdim7", "141": "A#:7", "142": "A#:min6", "143": "A#:maj7", "144": "B", "145": "B:dim", "146": "B:sus4", "147": "B:min7", "148": "B:min", "149": "B:sus2", "150": "B:aug", "151": "B:dim7", "152": "B:maj6", "153": "B:hdim7", "154": "B:7", "155": "B:min6", "156": "B:maj7", "157": "X"}
|
inference/data/chord_root.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"N": 0, "C": 1, "C#": 2, "D": 3, "D#": 4, "E": 5, "F": 6, "F#": 7, "G": 8, "G#": 9, "A": 10, "A#": 11, "B": 12}
|
inference/data/chord_root_inv.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"0": "N",
|
3 |
+
"1": "C",
|
4 |
+
"2": "C#",
|
5 |
+
"3": "D",
|
6 |
+
"4": "D#",
|
7 |
+
"5": "E",
|
8 |
+
"6": "F",
|
9 |
+
"7": "F#",
|
10 |
+
"8": "G",
|
11 |
+
"9": "G#",
|
12 |
+
"10": "A",
|
13 |
+
"11": "A#",
|
14 |
+
"12": "B"
|
15 |
+
}
|
inference/data/prep_config.yaml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device_id: 3
|
2 |
+
|
3 |
+
is_split: True
|
4 |
+
segment_duration: 30
|
5 |
+
|
6 |
+
|
7 |
+
# --- DATASET --- #
|
8 |
+
|
9 |
+
# dataset:
|
10 |
+
# input_dir: '../dataset/jamendo/mp3'
|
11 |
+
# output_dir: '../dataset/jamendo/mert_30s'
|
12 |
+
# audio length : Full
|
13 |
+
|
14 |
+
# dataset:
|
15 |
+
# input_dir: '../dataset/dmdd/mp3'
|
16 |
+
# output_dir: '../dataset/dmdd/mert_30s'
|
17 |
+
# # audio length : ~30s
|
18 |
+
|
19 |
+
# dataset:
|
20 |
+
# input_dir: '../dataset/emomusic/mp3'
|
21 |
+
# output_dir: '../dataset/emomusic/mert_30s'
|
22 |
+
# # audio length : ~30s
|
23 |
+
|
24 |
+
# dataset:
|
25 |
+
# input_dir: '../dataset/pmemo/mp3'
|
26 |
+
# output_dir: '../dataset/pmemo/mert_30s'
|
27 |
+
# # audio length : ~30s
|
28 |
+
|
29 |
+
|
30 |
+
dataset:
|
31 |
+
input_dir: '../dataset/deam/mp3'
|
32 |
+
output_dir: '../dataset/deam/mert_30s'
|
33 |
+
# audio length : ~30s
|
34 |
+
|
35 |
+
|
36 |
+
|
37 |
+
|
38 |
+
# --- ENCODER --- #
|
39 |
+
|
40 |
+
model:
|
41 |
+
name: 'm-a-p/MERT-v1-95M'
|
42 |
+
sr: 24000
|
43 |
+
|
44 |
+
# model:
|
45 |
+
# name: 'music2latent'
|
46 |
+
# sr: 44100
|
47 |
+
|
48 |
+
|
49 |
+
|
50 |
+
|
51 |
+
|
inference/data/run_config.yaml
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
mp3:
|
2 |
+
song_hz: 22050
|
3 |
+
inst_len: 10.0
|
4 |
+
skip_interval: 5.0
|
5 |
+
|
6 |
+
feature:
|
7 |
+
n_bins: 144
|
8 |
+
bins_per_octave: 24
|
9 |
+
hop_length: 2048
|
10 |
+
#large_voca: False
|
11 |
+
large_voca: True
|
12 |
+
|
13 |
+
experiment:
|
14 |
+
learning_rate : 0.0001
|
15 |
+
weight_decay : 0.0
|
16 |
+
max_epoch : 100
|
17 |
+
batch_size : 128
|
18 |
+
save_step : 40
|
19 |
+
data_ratio : 0.8
|
20 |
+
|
21 |
+
model:
|
22 |
+
feature_size : 144
|
23 |
+
timestep : 108
|
24 |
+
#num_chords : 25
|
25 |
+
num_chords : 170
|
26 |
+
input_dropout : 0.2
|
27 |
+
layer_dropout : 0.2
|
28 |
+
attention_dropout : 0.2
|
29 |
+
relu_dropout : 0.2
|
30 |
+
num_layers : 8
|
31 |
+
num_heads : 4
|
32 |
+
hidden_size : 128
|
33 |
+
total_key_depth : 128
|
34 |
+
total_value_depth : 128
|
35 |
+
filter_size : 128
|
36 |
+
loss : 'ce'
|
37 |
+
probs_out : False
|
38 |
+
|
39 |
+
path:
|
40 |
+
ckpt_path : 'model'
|
41 |
+
result_path : 'result'
|
42 |
+
asset_path : '/data/music/chord_recognition/jayg996/assets'
|
43 |
+
root_path : '/data/music/chord_recognition'
|
inference/data/tag_list.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9510e22fca2ac817c8af9287f1fa40dbbbc10c489ead8d7bfc99191c0569d60d
|
3 |
+
size 22820
|
inference/data/test_config.yaml
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defaults:
|
2 |
+
- base_config
|
3 |
+
- _self_
|
4 |
+
|
5 |
+
batch_size: 8
|
6 |
+
devices: [0]
|
7 |
+
|
8 |
+
trainer:
|
9 |
+
devices: ${devices}
|
10 |
+
accelerator: 'gpu'
|
11 |
+
|
12 |
+
# datasets:
|
13 |
+
# - jamendo
|
14 |
+
# - dmdd
|
15 |
+
|
16 |
+
checkpoint_latest: True
|
17 |
+
|
18 |
+
multitask: True
|
19 |
+
dataset_type: "va"
|
20 |
+
#'mood' or 'va'
|
21 |
+
## If not True, then use following checkpoint.
|
22 |
+
|
23 |
+
checkpoint: "tb_logs/best/EJ.ckpt"
|
24 |
+
|
25 |
+
# checkpoint_J: "tb_logs/best/jamendo.ckpt"
|
26 |
+
# checkpoint_P: "tb_logs/best/pmemo.ckpt"
|
27 |
+
# checkpoint_E: "tb_logs/best/emomusic.ckpt"
|
28 |
+
# checkpoint_D: "tb_logs/best/deam.ckpt"
|
29 |
+
|
30 |
+
# checkpoint_PJ: "tb_logs/best/PJ.ckpt"
|
31 |
+
# checkpoint_EJ: "tb_logs/best/EJ.ckpt"
|
32 |
+
# checkpoint_DJ: "tb_logs/best/DJ.ckpt"
|
33 |
+
|
34 |
+
# checkpoint_JP: "tb_logs/best/JP.ckpt"
|
35 |
+
# checkpoint_JE: "tb_logs/best/JE.ckpt"
|
36 |
+
# checkpoint_JD: "tb_logs/best/JD.ckpt"
|
37 |
+
|
38 |
+
# checkpoint_ALL: "tb_logs/best/ALL.ckpt"
|
39 |
+
|
40 |
+
|
41 |
+
# checkpoint: "tb_logs/train_audio_classification/version_110/checkpoints/21-0.1202.ckpt"
|
inference/data/train_config.yaml
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defaults:
|
2 |
+
- base_config
|
3 |
+
- _self_
|
4 |
+
|
5 |
+
devices: [0,1,2,3]
|
6 |
+
epochs: 500
|
7 |
+
batch_size: 8
|
8 |
+
|
9 |
+
monitor_metric: "val_loss"
|
10 |
+
monitor_metric_mood: "val_loss_mood"
|
11 |
+
monitor_metric_va: "val_loss_va"
|
12 |
+
|
13 |
+
checkpoint:
|
14 |
+
monitor: "${monitor_metric}"
|
15 |
+
filename: "{epoch:02d}-{${monitor_metric}:.4f}"
|
16 |
+
save_top_k: -1
|
17 |
+
mode: "min"
|
18 |
+
auto_insert_metric_name: False
|
19 |
+
save_last: True
|
20 |
+
|
21 |
+
checkpoint_mood:
|
22 |
+
monitor: "${monitor_metric_mood}"
|
23 |
+
filename: "mood-{epoch:02d}-{${monitor_metric_mood}:.4f}"
|
24 |
+
save_top_k: -1
|
25 |
+
mode: "min"
|
26 |
+
auto_insert_metric_name: False
|
27 |
+
save_last: True
|
28 |
+
|
29 |
+
checkpoint_va:
|
30 |
+
monitor: "${monitor_metric_va}"
|
31 |
+
filename: "va-{epoch:02d}-{${monitor_metric_va}:.4f}"
|
32 |
+
save_top_k: 5
|
33 |
+
mode: "min"
|
34 |
+
auto_insert_metric_name: False
|
35 |
+
save_last: True
|
36 |
+
|
37 |
+
earlystopping:
|
38 |
+
monitor: "${monitor_metric_mood}"
|
39 |
+
patience: 10
|
40 |
+
min_delta: 0.0001
|
41 |
+
mode: "min"
|
42 |
+
|
43 |
+
trainer:
|
44 |
+
devices: ${devices}
|
45 |
+
max_epochs: ${epochs}
|
46 |
+
accelerator: 'gpu'
|
47 |
+
|
48 |
+
|
49 |
+
|
50 |
+
|
51 |
+
|
52 |
+
|
53 |
+
|
54 |
+
# strategy: 'ddp_find_unused_parameters_true'
|
55 |
+
# optimizer:
|
56 |
+
# _target_: torch.optim.AdamW
|
57 |
+
# _partial_: true
|
58 |
+
# lr: 1e-4
|
59 |
+
# weight_decay: 0.01
|
60 |
+
# scheduler:
|
61 |
+
# _target_: torch.optim.lr_scheduler.ReduceLROnPlateau
|
62 |
+
# _partial_: true
|
63 |
+
# cooldown: 5
|
64 |
+
# mode: max
|
65 |
+
# factor: 0.2
|
66 |
+
# patience: 10
|
67 |
+
# min_lr: 1.6e-7
|
68 |
+
# monitor_metric: "val_loss"
|
69 |
+
# # val_loss
|
70 |
+
# # val_loss_mood
|
71 |
+
# # val_loss_va
|
72 |
+
# checkpoint:
|
73 |
+
# monitor: "${monitor_metric}"
|
74 |
+
# filename: "{epoch:02d}-{${monitor_metric}:.4f}"
|
75 |
+
# save_top_k: 2
|
76 |
+
# mode: "min"
|
77 |
+
# auto_insert_metric_name: False
|
78 |
+
# save_last: True
|
79 |
+
# checkpoint:
|
80 |
+
# monitor: "val_loss_mood"
|
81 |
+
# filename: "{epoch:02d}-{val_loss_mood:.4f}"
|
82 |
+
# save_top_k: 2
|
83 |
+
# mode: "min"
|
84 |
+
# auto_insert_metric_name: False
|
85 |
+
# save_last: True
|
86 |
+
# earlystopping:
|
87 |
+
# monitor: 'val_loss_mood'
|
88 |
+
# patience: 10
|
89 |
+
# min_delta: 0.0001
|
90 |
+
# mode: "min"
|
91 |
+
# datasets:
|
92 |
+
# - jamendo
|
93 |
+
# - dmdd
|
94 |
+
|
inference/input/test.mp3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22a56123f5adb9d061d4ab80a97aae12c84937d86a5042343c05e108b4e9fdda
|
3 |
+
size 8195178
|
inference/temp_out/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|