Ocelotr
/

xvecver2

Model card Files Files and versions Community

Ocelotr commited on Oct 19, 2023

Commit

0dcab9a

1 Parent(s): 1f407e9

Upload 10 files

Browse files

Files changed (10) hide show

CKPT.yaml +4 -0
brain.ckpt +3 -0
classifier.ckpt +3 -0
counter.ckpt +3 -0
dataloader-TRAIN.ckpt +3 -0
embedding_model.ckpt +3 -0
hyperparams.yaml +174 -0
label_encoder.txt +0 -0
normalizer.ckpt +3 -0
optimizer.ckpt +3 -0

CKPT.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# yamllint disable
+ErrorRate: 0.0858599990606308
+end-of-epoch: true
+unixtime: 1697667869.230798

brain.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03ff246e5802b4f3fb6b58cbb9c44ddd2db0438189abdac0adfaf35a15693cb0
+size 50

classifier.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a7e0361ed78659c386098743ce5cb94993a25506d7f4183ea71fd07f421fcb4
+size 7057579

counter.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:624b60c58c9d8bfb6ff1886c2fd605d2adeb6ea4da576068201b6c6958ce93f4
+size 2

dataloader-TRAIN.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a176eeb31e601c3877c87c2843a2f584968975269e369d5c86788b4c2f92d2a2
+size 4

embedding_model.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de454242171a3c0e34986aaf0fe6443a74f3b839e55edc3729837c7d94db34a2
+size 16887086

hyperparams.yaml ADDED Viewed

	@@ -0,0 +1,174 @@

+# Generated 2023-10-18 from:
+# /home/wakeb/Abdulrahman-tts/speechbrain/speechbrain/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
+# yamllint disable
+# ################################
+# Model: Speaker identification with ECAPA
+# Authors: Hwidong Na & Mirco Ravanelli
+# ################################
+# Basic parameters
+seed: 651
+__set_seed: !apply:torch.manual_seed [651]
+output_folder: /media/wakeb/T7 Touch/speechbrain651
+save_folder: /media/wakeb/T7 Touch/speechbrain651/save
+train_log: /media/wakeb/T7 Touch/speechbrain651/train_log.txt
+# Data files
+data_folder: /media/wakeb/T7 Touch/data_qasr/  # e.g. /path/to/Voxceleb
+train_annotation: /media/wakeb/T7 Touch/data_qasr/train_reworked.csv
+valid_annotation: /media/wakeb/T7 Touch/data_qasr/dev_reworked.csv
+# Folder to extract data augmentation files
+rir_folder: /media/wakeb/T7 Touch/data_qasr/ # Change it if needed
+# Use the following links for the official voxceleb splits:
+# VoxCeleb1 (cleaned): https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test2.txt
+# VoxCeleb1-H (cleaned): https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/list_test_hard2.txt
+# VoxCeleb1-E (cleaned): https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/list_test_all2.txt.
+# VoxCeleb1-E and VoxCeleb1-H lists are drawn from the VoxCeleb1 training set.
+# Therefore you cannot use any files in VoxCeleb1 for training if you are using these lists for testing.
+verification_file: /media/wakeb/T7 Touch/data_qasr/testing.txt
+split_ratio: [90, 10]
+skip_prep: true
+ckpt_interval_minutes: 15 # save checkpoint every N min
+# Training parameters
+number_of_epochs: 30
+batch_size: 140
+lr: 0.001
+lr_final: 0.000001
+sample_rate: 16000
+sentence_len: 3 # seconds
+shuffle: false
+random_chunk: true
+# Feature parameters
+n_mels: 24
+left_frames: 0
+right_frames: 0
+deltas: false
+# Number of speakers
+out_n_neurons: 2917
+emb_dim: 512
+dataloader_options:
+  batch_size: 140
+  shuffle: false
+  num_workers: 0
+# Functions
+compute_features: &id006 !new:speechbrain.lobes.features.Fbank
+  n_mels: 24
+  left_frames: 0
+  right_frames: 0
+  deltas: false
+embedding_model: &id007 !new:speechbrain.lobes.models.Xvector.Xvector
+  in_channels: 24
+  activation: !name:torch.nn.LeakyReLU
+  tdnn_blocks: 5
+  tdnn_channels: [512, 512, 512, 512, 1500]
+  tdnn_kernel_sizes: [5, 3, 3, 1, 1]
+  tdnn_dilations: [1, 2, 3, 1, 1]
+  lin_neurons: 512
+classifier: &id008 !new:speechbrain.lobes.models.Xvector.Classifier
+  input_shape: [null, null, 512]
+  activation: !name:torch.nn.LeakyReLU
+  lin_blocks: 1
+  lin_neurons: 512
+  out_neurons: 2917
+epoch_counter: &id010 !new:speechbrain.utils.epoch_loop.EpochCounter
+  limit: 30
+augment_wavedrop: &id001 !new:speechbrain.lobes.augment.TimeDomainSpecAugment
+  sample_rate: 16000
+  speeds: [100]
+augment_speed: &id002 !new:speechbrain.lobes.augment.TimeDomainSpecAugment
+  sample_rate: 16000
+  speeds: [95, 100, 105]
+add_rev: &id003 !new:speechbrain.lobes.augment.EnvCorrupt
+  openrir_folder: /media/wakeb/T7 Touch/data_qasr/
+  openrir_max_noise_len: 3.0    # seconds
+  reverb_prob: 1.0
+  noise_prob: 0.0
+  noise_snr_low: 0
+  noise_snr_high: 15
+  rir_scale_factor: 1.0
+add_noise: &id004 !new:speechbrain.lobes.augment.EnvCorrupt
+  openrir_folder: /media/wakeb/T7 Touch/data_qasr/
+  openrir_max_noise_len: 3.0    # seconds
+  reverb_prob: 0.0
+  noise_prob: 1.0
+  noise_snr_low: 0
+  noise_snr_high: 15
+  rir_scale_factor: 1.0
+add_rev_noise: &id005 !new:speechbrain.lobes.augment.EnvCorrupt
+  openrir_folder: /media/wakeb/T7 Touch/data_qasr/
+  openrir_max_noise_len: 3.0    # seconds
+  reverb_prob: 1.0
+  noise_prob: 1.0
+  noise_snr_low: 0
+  noise_snr_high: 15
+  rir_scale_factor: 1.0
+# Definition of the augmentation pipeline.
+# If concat_augment = False, the augmentation techniques are applied
+# in sequence. If concat_augment = True, all the augmented signals
+# are concatenated in a single big batch.
+augment_pipeline: [*id001, *id002, *id003, *id004, *id005]
+concat_augment: true
+mean_var_norm: &id009 !new:speechbrain.processing.features.InputNormalization
+# Cost + optimization
+  norm_type: sentence
+  std_norm: false
+modules:
+  compute_features: *id006
+  augment_wavedrop: *id001
+  augment_speed: *id002
+  add_rev: *id003
+  add_noise: *id004
+  add_rev_noise: *id005
+  embedding_model: *id007
+  classifier: *id008
+  mean_var_norm: *id009
+compute_cost: !name:speechbrain.nnet.losses.nll_loss
+# compute_error: !name:speechbrain.nnet.losses.classification_error
+opt_class: !name:torch.optim.Adam
+  lr: 0.001
+  weight_decay: 0.000002
+lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
+  initial_value: 0.001
+  final_value: 0.000001
+  epoch_count: 30
+# Logging + checkpoints
+train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
+  save_file: /media/wakeb/T7 Touch/speechbrain651/train_log.txt
+error_stats: !name:speechbrain.utils.metric_stats.MetricStats
+  metric: !name:speechbrain.nnet.losses.classification_error
+    reduction: batch
+checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
+  checkpoints_dir: /media/wakeb/T7 Touch/speechbrain651/save
+  recoverables:
+    embedding_model: *id007
+    classifier: *id008
+    normalizer: *id009
+    counter: *id010

label_encoder.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

normalizer.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b096223df751192e081a555337738d3746450113e7b7fa81f92c0d1d4c0617ac
+size 1075

optimizer.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bce30746a4c5ff2d4009f8c845e17d5e68d662e686d85c2434018db5a0a0dee2
+size 47806479