Spaces:
Sleeping
Sleeping
File size: 2,021 Bytes
42a4544 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
"""
Mostly copied from https://github.com/HarryVolek/PyTorch_Speaker_Verification
"""
import glob
import numpy as np
import os
import random
from random import shuffle
import torch
from torch.utils.data import Dataset
from utils.hparam import hparam as hp
from utils.utils import mfccs_and_spec
class GujaratiSpeakerVerificationDataset(Dataset):
def __init__(self, shuffle=True, utter_start=0, split='train'):
# data path
if split!='val':
self.path = hp.data.train_path
self.utter_num = hp.train.M
else:
self.path = hp.data.test_path
self.utter_num = hp.test.M
self.file_list = os.listdir(self.path)
self.shuffle=shuffle
self.utter_start = utter_start
self.split = split
def __len__(self):
return len(self.file_list)
def __getitem__(self, idx):
np_file_list = os.listdir(self.path)
if self.shuffle:
selected_file = random.sample(np_file_list, 1)[0] # select random speaker
else:
selected_file = np_file_list[idx]
utters = np.load(os.path.join(self.path, selected_file))
# load utterance spectrogram of selected speaker
if self.shuffle:
utter_index = np.random.randint(0, utters.shape[0], self.utter_num) # select M utterances per speaker
utterance = utters[utter_index]
else:
utterance = utters[self.utter_start: self.utter_start+self.utter_num] # utterances of a speaker [batch(M), n_mels, frames]
utterance = utterance[:,:,:160] # TODO implement variable length batch size
utterance = torch.tensor(np.transpose(utterance, axes=(0,2,1))) # transpose [batch, frames, n_mels]
return utterance
def __repr__(self):
return f"{self.__class__.__name__}(split={self.split!r}, num_speakers={len(self.file_list)}, num_utterances={self.utter_num})" |