jarredou
/

banditv2_state_dicts_only

Model card Files Files and versions Community

jarredou commited on Dec 31, 2024

Commit

1b4f378

·

verified ·

1 Parent(s): 10628df

Upload 2 files

Files changed (2) hide show

checkpoint-multi_state_dict.ckpt +3 -0
config_dnr_bandit_v2_mus64.yaml +77 -0

checkpoint-multi_state_dict.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f362d9a090a58289a4882882c0c362dd6e750f59ede968416d5d5c2104fdf589
+size 149102946

config_dnr_bandit_v2_mus64.yaml ADDED Viewed

	@@ -0,0 +1,77 @@

+cls: Bandit
+audio:
+  chunk_size: 384000
+  num_channels: 2
+  sample_rate: 48000
+  min_mean_abs: 0.000
+kwargs:
+  in_channels: 1
+  stems: ['speech', 'music', 'sfx']
+  band_type: musical
+  n_bands: 64
+  normalize_channel_independently: false
+  treat_channel_as_feature: true
+  n_sqm_modules: 8
+  emb_dim: 128
+  rnn_dim: 256
+  bidirectional: true
+  rnn_type: "GRU"
+  mlp_dim: 512
+  hidden_activation: "Tanh"
+  hidden_activation_kwargs: null
+  complex_mask: true
+  use_freq_weights: true
+  n_fft: 2048
+  win_length: 2048
+  hop_length: 512
+  window_fn: "hann_window"
+  wkwargs: null
+  power: null
+  center: true
+  normalized: true
+  pad_mode: "reflect"
+  onesided: true
+training:
+  batch_size: 4
+  gradient_accumulation_steps: 4
+  grad_clip: 0
+  instruments:
+  - speech
+  - music
+  - sfx
+  lr: 9.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: null
+  num_epochs: 1000
+  num_steps: 1000
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+augmentations:
+  enable: true # enable or disable all augmentations (to fast disable if needed)
+  loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+  loudness_min: 0.5
+  loudness_max: 1.5
+  mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+  mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+    - 0.2
+    - 0.02
+  mixup_loudness_min: 0.5
+  mixup_loudness_max: 1.5
+  all:
+    channel_shuffle: 0.5 # Set 0 or lower to disable
+    random_inverse: 0.1 # inverse track (better lower probability)
+    random_polarity: 0.5 # polarity change (multiply waveform to -1)
+inference:
+  batch_size: 8
+  dim_t: 256
+  num_overlap: 4