Spaces:

ldhldh
/

streamlit_demo

Runtime error

App Files Files

xet

Community

streamlit_demo / src /models /lcnn.py

ldhldh

Upload 11 files

bb5a96d verified over 1 year ago

raw

history blame contribute delete

7.93 kB

	"""
	This code is modified version of LCNN baseline
	from ASVSpoof2021 challenge - https://github.com/asvspoof-challenge/2021/blob/main/LA/Baseline-LFCC-LCNN/project/baseline_LA/model.py
	"""
	import sys

	import torch
	import torch.nn as torch_nn

	from src import frontends


	NUM_COEFFICIENTS = 384


	# For blstm
	class BLSTMLayer(torch_nn.Module):
	""" Wrapper over dilated conv1D
	Input tensor: (batchsize=1, length, dim_in)
	Output tensor: (batchsize=1, length, dim_out)
	We want to keep the length the same
	"""
	def __init__(self, input_dim, output_dim):
	super().__init__()
	if output_dim % 2 != 0:
	print("Output_dim of BLSTMLayer is {:d}".format(output_dim))
	print("BLSTMLayer expects a layer size of even number")
	sys.exit(1)
	# bi-directional LSTM
	self.l_blstm = torch_nn.LSTM(
	input_dim,
	output_dim // 2,
	bidirectional=True
	)
	def forward(self, x):
	# permute to (length, batchsize=1, dim)
	blstm_data, _ = self.l_blstm(x.permute(1, 0, 2))
	# permute it backt to (batchsize=1, length, dim)
	return blstm_data.permute(1, 0, 2)


	class MaxFeatureMap2D(torch_nn.Module):
	""" Max feature map (along 2D)

	MaxFeatureMap2D(max_dim=1)

	l_conv2d = MaxFeatureMap2D(1)
	data_in = torch.rand([1, 4, 5, 5])
	data_out = l_conv2d(data_in)


	Input:
	------
	data_in: tensor of shape (batch, channel, ...)

	Output:
	-------
	data_out: tensor of shape (batch, channel//2, ...)

	Note
	----
	By default, Max-feature-map is on channel dimension,
	and maxout is used on (channel ...)
	"""
	def __init__(self, max_dim = 1):
	super().__init__()
	self.max_dim = max_dim

	def forward(self, inputs):
	# suppose inputs (batchsize, channel, length, dim)

	shape = list(inputs.size())

	if self.max_dim >= len(shape):
	print("MaxFeatureMap: maximize on %d dim" % (self.max_dim))
	print("But input has %d dimensions" % (len(shape)))
	sys.exit(1)
	if shape[self.max_dim] // 2 * 2 != shape[self.max_dim]:
	print("MaxFeatureMap: maximize on %d dim" % (self.max_dim))
	print("But this dimension has an odd number of data")
	sys.exit(1)
	shape[self.max_dim] = shape[self.max_dim]//2
	shape.insert(self.max_dim, 2)

	# view to (batchsize, 2, channel//2, ...)
	# maximize on the 2nd dim
	m, i = inputs.view(*shape).max(self.max_dim)
	return m


	##############
	## FOR MODEL
	##############

	class LCNN(torch_nn.Module):
	""" Model definition
	"""
	def __init__(self, **kwargs):
	super().__init__()
	input_channels = kwargs.get("input_channels", 1)
	num_coefficients = kwargs.get("num_coefficients", NUM_COEFFICIENTS)

	# Working sampling rate
	self.num_coefficients = num_coefficients

	# dimension of embedding vectors
	# here, the embedding is just the activation before sigmoid()
	self.v_emd_dim = 1

	# it can handle models with multiple front-end configuration
	# by default, only a single front-end

	self.m_transform = torch_nn.Sequential(
	torch_nn.Conv2d(input_channels, 64, (5, 5), 1, padding=(2, 2)),
	MaxFeatureMap2D(),
	torch.nn.MaxPool2d((2, 2), (2, 2)),

	torch_nn.Conv2d(32, 64, (1, 1), 1, padding=(0, 0)),
	MaxFeatureMap2D(),
	torch_nn.BatchNorm2d(32, affine=False),
	torch_nn.Conv2d(32, 96, (3, 3), 1, padding=(1, 1)),
	MaxFeatureMap2D(),

	torch.nn.MaxPool2d((2, 2), (2, 2)),
	torch_nn.BatchNorm2d(48, affine=False),

	torch_nn.Conv2d(48, 96, (1, 1), 1, padding=(0, 0)),
	MaxFeatureMap2D(),
	torch_nn.BatchNorm2d(48, affine=False),
	torch_nn.Conv2d(48, 128, (3, 3), 1, padding=(1, 1)),
	MaxFeatureMap2D(),

	torch.nn.MaxPool2d((2, 2), (2, 2)),

	torch_nn.Conv2d(64, 128, (1, 1), 1, padding=(0, 0)),
	MaxFeatureMap2D(),
	torch_nn.BatchNorm2d(64, affine=False),
	torch_nn.Conv2d(64, 64, (3, 3), 1, padding=(1, 1)),
	MaxFeatureMap2D(),
	torch_nn.BatchNorm2d(32, affine=False),

	torch_nn.Conv2d(32, 64, (1, 1), 1, padding=(0, 0)),
	MaxFeatureMap2D(),
	torch_nn.BatchNorm2d(32, affine=False),
	torch_nn.Conv2d(32, 64, (3, 3), 1, padding=(1, 1)),
	MaxFeatureMap2D(),
	torch_nn.MaxPool2d((2, 2), (2, 2)),

	torch_nn.Dropout(0.7)
	)

	self.m_before_pooling = torch_nn.Sequential(
	BLSTMLayer((self.num_coefficients//16) * 32, (self.num_coefficients//16) * 32),
	BLSTMLayer((self.num_coefficients//16) * 32, (self.num_coefficients//16) * 32)
	)

	self.m_output_act = torch_nn.Linear((self.num_coefficients // 16) * 32, self.v_emd_dim)

	def _compute_embedding(self, x):
	""" definition of forward method
	Assume x (batchsize, length, dim)
	Output x (batchsize * number_filter, output_dim)
	"""
	# resample if necessary
	# x = self.m_resampler(x.squeeze(-1)).unsqueeze(-1)

	# number of sub models
	batch_size = x.shape[0]

	# buffer to store output scores from sub-models
	output_emb = torch.zeros(
	[batch_size, self.v_emd_dim],
	device=x.device,
	dtype=x.dtype
	)

	# compute scores for each sub-models
	idx = 0

	# compute scores
	# 1. unsqueeze to (batch, 1, frame_length, fft_bin)
	# 2. compute hidden features
	x = x.permute(0,1,3,2)
	hidden_features = self.m_transform(x)

	# 3. (batch, channel, frame//N, feat_dim//N) ->
	# (batch, frame//N, channel * feat_dim//N)
	# where N is caused by conv with stride
	hidden_features = hidden_features.permute(0, 2, 1, 3).contiguous()
	frame_num = hidden_features.shape[1]

	hidden_features = hidden_features.view(batch_size, frame_num, -1)
	# 4. pooling
	# 4. pass through LSTM then summingc
	hidden_features_lstm = self.m_before_pooling(hidden_features)

	# 5. pass through the output layer
	tmp_emb = self.m_output_act((hidden_features_lstm + hidden_features).mean(1))
	output_emb[idx * batch_size : (idx+1) * batch_size] = tmp_emb

	return output_emb

	def _compute_score(self, feature_vec):
	# feature_vec is [batch * submodel, 1]
	return torch.sigmoid(feature_vec).squeeze(1)

	def forward(self, x):
	feature_vec = self._compute_embedding(x)
	return feature_vec



	class FrontendLCNN(LCNN):
	""" Model definition
	"""
	def __init__(self, device: str = "cuda", **kwargs):
	super().__init__(**kwargs)

	self.device = device

	frontend_name = kwargs.get("frontend_algorithm", [])
	self.frontend = frontends.get_frontend(frontend_name)
	print(f"Using {frontend_name} frontend")

	def _compute_frontend(self, x):
	frontend = self.frontend(x)
	if frontend.ndim < 4:
	return frontend.unsqueeze(1) # (bs, 1, n_lfcc, frames)
	return frontend # (bs, n, n_lfcc, frames)

	def forward(self, x):
	x = self._compute_frontend(x)
	feature_vec = self._compute_embedding(x)

	return feature_vec


	if __name__ == "__main__":

	device = "cuda"
	print("Definition of model")
	model = FrontendLCNN(input_channels=2, num_coefficients=80, device=device, frontend_algorithm=["mel_spec"])
	model = model.to(device)
	batch_size = 12
	mock_input = torch.rand((batch_size, 64_600,), device=device)
	output = model(mock_input)
	print(output.shape)