Initial commit

Browse files

Files changed (6) hide show

.gitattributes +1 -0
classifier.ckpt +3 -0
config.json +3 -0
embedding_model.ckpt +3 -0
hyperparams.yaml +64 -0
label_encoder.txt +6 -0

.gitattributes CHANGED Viewed

@@ -26,3 +26,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text

classifier.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1c5cf560dc904388ca459804ce8ee11944b3edc102a146d2a7c3c3ea1b3b67b
+size 9067

config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+    "speechbrain_interface": "EncoderClassifier"
+}

embedding_model.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0766e6aba5bd68e4a080116bb8ae17afc616284d1c2a50a22d67202b56e73b11
+size 1065702

hyperparams.yaml ADDED Viewed

	@@ -0,0 +1,64 @@

+# ############################################################################
+# Model: ECAPA-TDNN for Audio Classification
+# ############################################################################
+# Pretrain folder (HuggingFace)
+pretrained_path: /content/drive/MyDrive/audify/inference
+# Feature parameters
+n_mels: 80
+# Output parameters
+out_n_neurons: 4 # Possible languages in the dataset
+# Model params
+compute_features: !new:speechbrain.lobes.features.Fbank
+  n_mels: !ref <n_mels>
+mean_var_norm: !new:speechbrain.processing.features.InputNormalization
+  norm_type: sentence
+  std_norm: False
+# Embedding Model
+CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
+  input_shape: (null, null, 80)
+  num_blocks: 3
+  num_layers_per_block: 1
+  out_channels: (128, 256, 512)
+  kernel_sizes: (3, 3, 1)
+  strides: (2, 2, 1)
+  residuals: (False, False, False)
+  conv_module: !name:speechbrain.nnet.CNN.Conv1d
+  norm: !name:speechbrain.nnet.normalization.BatchNorm1d
+pooling: !new:speechbrain.nnet.pooling.AdaptivePool
+  output_size: 1
+embedding: !new:torch.nn.ModuleList
+  - [!ref <CNN>, !ref <pooling>]
+embedding_model: !new:speechbrain.nnet.containers.LengthsCapableSequential
+  CNN: !ref <CNN>
+  pooling: !ref <pooling>
+classifier: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
+  input_size: 512
+  out_neurons: !ref <out_n_neurons>
+modules:
+  compute_features: !ref <compute_features>
+  mean_var_norm: !ref <mean_var_norm>
+  embedding_model: !ref <embedding_model>
+  classifier: !ref <classifier>
+label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
+pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
+  loadables:
+    embedding_model: !ref <embedding>
+    classifier: !ref <classifier>
+    label_encoder: !ref <label_encoder>
+  paths:
+    embedding_model: !ref <pretrained_path>/embedding_model.ckpt
+    classifier: !ref <pretrained_path>/classifier.ckpt
+    label_encoder: !ref <pretrained_path>/label_encoder.txt

label_encoder.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+'speech' => 0
+'noisy_speech' => 1
+'corrupted' => 2
+'music' => 3
+================
+'starting_index' => 0