dragonSwing commited on
Commit
09e18b1
·
1 Parent(s): 035d6f4

Initial commit

Browse files
.gitattributes CHANGED
@@ -26,3 +26,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
26
  *.zip filter=lfs diff=lfs merge=lfs -text
27
  *.zstandard filter=lfs diff=lfs merge=lfs -text
28
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
26
  *.zip filter=lfs diff=lfs merge=lfs -text
27
  *.zstandard filter=lfs diff=lfs merge=lfs -text
28
  *tfevents* filter=lfs diff=lfs merge=lfs -text
29
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
classifier.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1c5cf560dc904388ca459804ce8ee11944b3edc102a146d2a7c3c3ea1b3b67b
3
+ size 9067
config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "speechbrain_interface": "EncoderClassifier"
3
+ }
embedding_model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0766e6aba5bd68e4a080116bb8ae17afc616284d1c2a50a22d67202b56e73b11
3
+ size 1065702
hyperparams.yaml ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ############################################################################
2
+ # Model: ECAPA-TDNN for Audio Classification
3
+ # ############################################################################
4
+
5
+ # Pretrain folder (HuggingFace)
6
+ pretrained_path: /content/drive/MyDrive/audify/inference
7
+
8
+ # Feature parameters
9
+ n_mels: 80
10
+
11
+ # Output parameters
12
+ out_n_neurons: 4 # Possible languages in the dataset
13
+
14
+ # Model params
15
+ compute_features: !new:speechbrain.lobes.features.Fbank
16
+ n_mels: !ref <n_mels>
17
+
18
+ mean_var_norm: !new:speechbrain.processing.features.InputNormalization
19
+ norm_type: sentence
20
+ std_norm: False
21
+
22
+ # Embedding Model
23
+ CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
24
+ input_shape: (null, null, 80)
25
+ num_blocks: 3
26
+ num_layers_per_block: 1
27
+ out_channels: (128, 256, 512)
28
+ kernel_sizes: (3, 3, 1)
29
+ strides: (2, 2, 1)
30
+ residuals: (False, False, False)
31
+ conv_module: !name:speechbrain.nnet.CNN.Conv1d
32
+ norm: !name:speechbrain.nnet.normalization.BatchNorm1d
33
+
34
+ pooling: !new:speechbrain.nnet.pooling.AdaptivePool
35
+ output_size: 1
36
+
37
+ embedding: !new:torch.nn.ModuleList
38
+ - [!ref <CNN>, !ref <pooling>]
39
+
40
+ embedding_model: !new:speechbrain.nnet.containers.LengthsCapableSequential
41
+ CNN: !ref <CNN>
42
+ pooling: !ref <pooling>
43
+
44
+ classifier: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
45
+ input_size: 512
46
+ out_neurons: !ref <out_n_neurons>
47
+
48
+ modules:
49
+ compute_features: !ref <compute_features>
50
+ mean_var_norm: !ref <mean_var_norm>
51
+ embedding_model: !ref <embedding_model>
52
+ classifier: !ref <classifier>
53
+
54
+ label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
55
+
56
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
57
+ loadables:
58
+ embedding_model: !ref <embedding>
59
+ classifier: !ref <classifier>
60
+ label_encoder: !ref <label_encoder>
61
+ paths:
62
+ embedding_model: !ref <pretrained_path>/embedding_model.ckpt
63
+ classifier: !ref <pretrained_path>/classifier.ckpt
64
+ label_encoder: !ref <pretrained_path>/label_encoder.txt
label_encoder.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ 'speech' => 0
2
+ 'noisy_speech' => 1
3
+ 'corrupted' => 2
4
+ 'music' => 3
5
+ ================
6
+ 'starting_index' => 0