Spaces:
Sleeping
Sleeping
""" | |
Mostly copied from https://github.com/HarryVolek/PyTorch_Speaker_Verification | |
""" | |
import glob | |
import numpy as np | |
import os | |
import random | |
from random import shuffle | |
import torch | |
from torch.utils.data import Dataset | |
from utils.hparam import hparam as hp | |
from utils.utils import mfccs_and_spec | |
class GujaratiSpeakerVerificationDataset(Dataset): | |
def __init__(self, shuffle=True, utter_start=0, split='train'): | |
# data path | |
if split!='val': | |
self.path = hp.data.train_path | |
self.utter_num = hp.train.M | |
else: | |
self.path = hp.data.test_path | |
self.utter_num = hp.test.M | |
self.file_list = os.listdir(self.path) | |
self.shuffle=shuffle | |
self.utter_start = utter_start | |
self.split = split | |
def __len__(self): | |
return len(self.file_list) | |
def __getitem__(self, idx): | |
np_file_list = os.listdir(self.path) | |
if self.shuffle: | |
selected_file = random.sample(np_file_list, 1)[0] # select random speaker | |
else: | |
selected_file = np_file_list[idx] | |
utters = np.load(os.path.join(self.path, selected_file)) | |
# load utterance spectrogram of selected speaker | |
if self.shuffle: | |
utter_index = np.random.randint(0, utters.shape[0], self.utter_num) # select M utterances per speaker | |
utterance = utters[utter_index] | |
else: | |
utterance = utters[self.utter_start: self.utter_start+self.utter_num] # utterances of a speaker [batch(M), n_mels, frames] | |
utterance = utterance[:,:,:160] # TODO implement variable length batch size | |
utterance = torch.tensor(np.transpose(utterance, axes=(0,2,1))) # transpose [batch, frames, n_mels] | |
return utterance | |
def __repr__(self): | |
return f"{self.__class__.__name__}(split={self.split!r}, num_speakers={len(self.file_list)}, num_utterances={self.utter_num})" |