File size: 1,437 Bytes
42a4544
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
training: !!bool "false"
device: "mps"
unprocessed_data: './DATA_DIR/*/*.wav'
---
data:
    train_path: './train_tisv'
    train_path_unprocessed: './TIMIT/TRAIN/*/*/*.wav'
    test_path: './test_tisv'
    test_path_unprocessed: './TIMIT/TEST/*/*/*.wav'
    data_preprocessed: !!bool "true" 
    sr: 16000
    nfft: 512 #For mel spectrogram preprocess
    window: 0.025 #(s)
    hop: 0.01 #(s)
    nmels: 40 #Number of mel energies
    tisv_frame: 180 #Max number of time steps in input after preprocess
---   
model:
    hidden: 768 #Number of LSTM hidden layer units
    num_layer: 3 #Number of LSTM layers
    proj: 256 #Embedding size
    model_path: './speech_id_checkpoint/ckpt_epoch_840_batch_id_6.pth' #Model path for testing, inference, or resuming training
---
train:
    N : 4 #Number of speakers in batch
    M : 6 #Number of utterances per speaker
    num_workers: 0 #number of workers for dataloader
    lr: 0.01 
    epochs: 1000 #Max training speaker epoch 
    log_interval: 30 #Epochs before printing progress
    log_file: './speech_id_checkpoint/Stats'
    checkpoint_interval: 100 #Save model after x speaker epochs
    checkpoint_dir: './speech_id_checkpoint'
    restore: !!bool "true" #Resume training from previous model path
---
test:
    N : 4 #Number of speakers in batch
    M : 6 #Number of utterances per speaker
    num_workers: 8 #number of workers for data laoder
    epochs: 10 #testing speaker epochs