File size: 2,349 Bytes
a00b67a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# For De-limit task, Conv-TasNet. 
# si_sdr loss
#  
# ozone_train_fixed is about 6.36 hours
# 300,000 segments is about 333.33 hours
# ratio should be about 0.019

wandb_params:
  use_wandb: true
  entity: null # your wandb id
  project: delimit # your wandb project
  rerun_id: null # use when you rerun wandb.
  sweep: false

sys_params:
  nb_workers: 4
  seed: 777
  n_nodes: 1
  port: null
  rank: 0

task_params:
  target: all # choices=["all"]
  train: true
  dataset: delimit # choices=["musdb", "delimit"]

dir_params:
  root: /path/to/musdb18hq
  output_directory: /path/to/results
  exp_name: convtasnet_6_s # you MUST specify this
  resume: null # "path of checkpoint folder"
  continual_train: false # when we want to use a pre-trained model but not want to use lr_scheduler history.
  delimit_valid_root: null
  delimit_valid_L_root: null
  ozone_root: /path/to/musdb-XL-train # you have to specify data_params.use_fixed

hyperparams:
  batch_size: 8 # with 1 gpus (we used 2080ti 11GB)
  epochs: 200
  optimizer: adamw
  weight_decay: 0.01
  lr: 0.00003
  lr_decay_gamma: 0.5
  lr_decay_patience: 15
  patience: 50
  lr_scheduler: step_lr
  gradient_clip: 5.0
  ema: false

data_params:
  nfft: 4096
  nhop: 1024
  nb_channels: 2
  sample_rate: 44100
  seq_dur: 4.0
  singleset_num_frames: null
  samples_per_track: 128 # "Number of samples per track to use for training."
  limitaug_method: ozone
  limitaug_mode: null
  limitaug_custom_target_lufs: null
  limitaug_custom_target_lufs_std: null
  target_loudnorm_lufs: -14.0
  random_mix: true
  target_limitaug_mode: null
  target_limitaug_custom_target_lufs: null
  target_limitaug_custom_target_lufs_std: null
  custom_limiter_attack_range: null
  custom_limiter_release_range: null
  use_fixed: 0.019 # range 0.0 ~ 1.0 => 1.0 will use fixed Ozoned_mixture training examples only.

model_loss_params:
  architecture: conv_tasnet_mask_on_output # Sample-wise Gain Inversion (SGI)
  train_loss_func: [si_sdr]
  train_loss_scales: [1.]
  valid_loss_func: [si_sdr]
  valid_loss_scales: [1.]

conv_tasnet_params:
  encoder_activation: relu
  n_filters: 512
  kernel_size: 128 # about 3ms in 44100Hz
  stride: 64
  n_blocks: 5
  n_repeats: 2
  bn_chan: 128
  hid_chan: 512
  skip_chan: 128
  # conv_kernel_size:
  # norm_type:
  mask_act: relu
  # causal:
  decoder_activation: sigmoid