jarredou commited on
Commit
1b4f378
·
verified ·
1 Parent(s): 10628df

Upload 2 files

Browse files
checkpoint-multi_state_dict.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f362d9a090a58289a4882882c0c362dd6e750f59ede968416d5d5c2104fdf589
3
+ size 149102946
config_dnr_bandit_v2_mus64.yaml ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cls: Bandit
2
+
3
+ audio:
4
+ chunk_size: 384000
5
+ num_channels: 2
6
+ sample_rate: 48000
7
+ min_mean_abs: 0.000
8
+
9
+ kwargs:
10
+ in_channels: 1
11
+ stems: ['speech', 'music', 'sfx']
12
+ band_type: musical
13
+ n_bands: 64
14
+ normalize_channel_independently: false
15
+ treat_channel_as_feature: true
16
+ n_sqm_modules: 8
17
+ emb_dim: 128
18
+ rnn_dim: 256
19
+ bidirectional: true
20
+ rnn_type: "GRU"
21
+ mlp_dim: 512
22
+ hidden_activation: "Tanh"
23
+ hidden_activation_kwargs: null
24
+ complex_mask: true
25
+ use_freq_weights: true
26
+ n_fft: 2048
27
+ win_length: 2048
28
+ hop_length: 512
29
+ window_fn: "hann_window"
30
+ wkwargs: null
31
+ power: null
32
+ center: true
33
+ normalized: true
34
+ pad_mode: "reflect"
35
+ onesided: true
36
+
37
+ training:
38
+ batch_size: 4
39
+ gradient_accumulation_steps: 4
40
+ grad_clip: 0
41
+ instruments:
42
+ - speech
43
+ - music
44
+ - sfx
45
+ lr: 9.0e-05
46
+ patience: 2
47
+ reduce_factor: 0.95
48
+ target_instrument: null
49
+ num_epochs: 1000
50
+ num_steps: 1000
51
+ q: 0.95
52
+ coarse_loss_clip: true
53
+ ema_momentum: 0.999
54
+ optimizer: adam
55
+ other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
56
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
57
+
58
+ augmentations:
59
+ enable: true # enable or disable all augmentations (to fast disable if needed)
60
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
61
+ loudness_min: 0.5
62
+ loudness_max: 1.5
63
+ mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
64
+ mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
65
+ - 0.2
66
+ - 0.02
67
+ mixup_loudness_min: 0.5
68
+ mixup_loudness_max: 1.5
69
+ all:
70
+ channel_shuffle: 0.5 # Set 0 or lower to disable
71
+ random_inverse: 0.1 # inverse track (better lower probability)
72
+ random_polarity: 0.5 # polarity change (multiply waveform to -1)
73
+
74
+ inference:
75
+ batch_size: 8
76
+ dim_t: 256
77
+ num_overlap: 4