Upload 2 files
Browse files
checkpoint-multi_state_dict.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f362d9a090a58289a4882882c0c362dd6e750f59ede968416d5d5c2104fdf589
|
3 |
+
size 149102946
|
config_dnr_bandit_v2_mus64.yaml
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cls: Bandit
|
2 |
+
|
3 |
+
audio:
|
4 |
+
chunk_size: 384000
|
5 |
+
num_channels: 2
|
6 |
+
sample_rate: 48000
|
7 |
+
min_mean_abs: 0.000
|
8 |
+
|
9 |
+
kwargs:
|
10 |
+
in_channels: 1
|
11 |
+
stems: ['speech', 'music', 'sfx']
|
12 |
+
band_type: musical
|
13 |
+
n_bands: 64
|
14 |
+
normalize_channel_independently: false
|
15 |
+
treat_channel_as_feature: true
|
16 |
+
n_sqm_modules: 8
|
17 |
+
emb_dim: 128
|
18 |
+
rnn_dim: 256
|
19 |
+
bidirectional: true
|
20 |
+
rnn_type: "GRU"
|
21 |
+
mlp_dim: 512
|
22 |
+
hidden_activation: "Tanh"
|
23 |
+
hidden_activation_kwargs: null
|
24 |
+
complex_mask: true
|
25 |
+
use_freq_weights: true
|
26 |
+
n_fft: 2048
|
27 |
+
win_length: 2048
|
28 |
+
hop_length: 512
|
29 |
+
window_fn: "hann_window"
|
30 |
+
wkwargs: null
|
31 |
+
power: null
|
32 |
+
center: true
|
33 |
+
normalized: true
|
34 |
+
pad_mode: "reflect"
|
35 |
+
onesided: true
|
36 |
+
|
37 |
+
training:
|
38 |
+
batch_size: 4
|
39 |
+
gradient_accumulation_steps: 4
|
40 |
+
grad_clip: 0
|
41 |
+
instruments:
|
42 |
+
- speech
|
43 |
+
- music
|
44 |
+
- sfx
|
45 |
+
lr: 9.0e-05
|
46 |
+
patience: 2
|
47 |
+
reduce_factor: 0.95
|
48 |
+
target_instrument: null
|
49 |
+
num_epochs: 1000
|
50 |
+
num_steps: 1000
|
51 |
+
q: 0.95
|
52 |
+
coarse_loss_clip: true
|
53 |
+
ema_momentum: 0.999
|
54 |
+
optimizer: adam
|
55 |
+
other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
|
56 |
+
use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
|
57 |
+
|
58 |
+
augmentations:
|
59 |
+
enable: true # enable or disable all augmentations (to fast disable if needed)
|
60 |
+
loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
|
61 |
+
loudness_min: 0.5
|
62 |
+
loudness_max: 1.5
|
63 |
+
mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
|
64 |
+
mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
|
65 |
+
- 0.2
|
66 |
+
- 0.02
|
67 |
+
mixup_loudness_min: 0.5
|
68 |
+
mixup_loudness_max: 1.5
|
69 |
+
all:
|
70 |
+
channel_shuffle: 0.5 # Set 0 or lower to disable
|
71 |
+
random_inverse: 0.1 # inverse track (better lower probability)
|
72 |
+
random_polarity: 0.5 # polarity change (multiply waveform to -1)
|
73 |
+
|
74 |
+
inference:
|
75 |
+
batch_size: 8
|
76 |
+
dim_t: 256
|
77 |
+
num_overlap: 4
|