|
from pathlib import Path |
|
import torch |
|
import numpy as np |
|
from scipy.spatial.distance import cosine |
|
import json |
|
|
|
from .demo_speaker_embeddings import DemoSpeakerEmbeddings |
|
|
|
|
|
class DemoGANAnonymizer: |
|
|
|
def __init__(self, vec_type='xvector', device=None, sim_threshold=0.7): |
|
self.vec_type = vec_type |
|
self.device = device |
|
|
|
self.sim_threshold = sim_threshold |
|
|
|
self.embedding_extractor = DemoSpeakerEmbeddings(vec_type=self.vec_type, device=self.device) |
|
|
|
self.model_dir = None |
|
self.vectors_file = None |
|
self.gan_vectors = None |
|
self.unused_indices = None |
|
|
|
def load_parameters(self, model_dir: Path): |
|
self.model_dir = model_dir |
|
with open(model_dir / 'settings.json') as f: |
|
settings = json.load(f) |
|
self.vec_type = settings.get('vec_type', self.vec_type) |
|
self.vectors_file = settings.get('vectors_file', self.vectors_file) |
|
|
|
self.gan_vectors = torch.load(model_dir / self.vectors_file, map_location=self.device) |
|
self.unused_indices = self.load_unused_indices() |
|
|
|
def load_unused_indices(self): |
|
return torch.load(self.model_dir / f'unused_indices_{self.vectors_file}', map_location='cpu') |
|
|
|
def anonymize_embedding(self, audio, sr): |
|
speaker_embedding = self.embedding_extractor.extract_vector_from_audio(wave=audio, sr=sr) |
|
anon_vec = self._select_gan_vector(spk_vec=speaker_embedding) |
|
return anon_vec |
|
|
|
def _select_gan_vector(self, spk_vec): |
|
i = 0 |
|
limit = 20 |
|
while i < limit: |
|
idx = np.random.choice(self.unused_indices) |
|
anon_vec = self.gan_vectors[idx] |
|
sim = 1 - cosine(spk_vec.cpu().numpy(), anon_vec.cpu().numpy()) |
|
if sim < self.sim_threshold: |
|
break |
|
i += 1 |
|
self.unused_indices = self.unused_indices[self.unused_indices != idx] |
|
if len(self.unused_indices) == 0: |
|
self.unused_indices = self.load_unused_indices() |
|
return anon_vec |