Spaces:

wxDai
/

MotionLCM

Running

App Files Files Community

MotionLCM / mld /models /metrics /utils.py

wxDai

[Init]

eb339cb 2 months ago

raw

history blame contribute delete

10.1 kB

	import numpy as np

	import scipy.linalg
	from scipy.ndimage import uniform_filter1d

	import torch
	from torch import linalg


	# Motion Reconstruction

	def calculate_mpjpe(gt_joints: torch.Tensor, pred_joints: torch.Tensor, align_root: bool = True) -> torch.Tensor:
	"""
	gt_joints: num_poses x num_joints x 3
	pred_joints: num_poses x num_joints x 3
	(obtained from recover_from_ric())
	"""
	assert gt_joints.shape == pred_joints.shape, \
	f"GT shape: {gt_joints.shape}, pred shape: {pred_joints.shape}"

	# Align by root (pelvis)
	if align_root:
	gt_joints = gt_joints - gt_joints[:, [0]]
	pred_joints = pred_joints - pred_joints[:, [0]]

	# Compute MPJPE
	mpjpe = torch.linalg.norm(pred_joints - gt_joints, dim=-1) # num_poses x num_joints
	mpjpe = mpjpe.mean(-1) # num_poses

	return mpjpe


	# Text-to-Motion

	# (X - X_train)(X - X_train) = -2XX_train + XX + X_trainX_train
	def euclidean_distance_matrix(matrix1: torch.Tensor, matrix2: torch.Tensor) -> torch.Tensor:
	"""
	Params:
	-- matrix1: N1 x D
	-- matrix2: N2 x D
	Returns:
	-- dists: N1 x N2
	dists[i, j] == distance(matrix1[i], matrix2[j])
	"""
	assert matrix1.shape[1] == matrix2.shape[1]
	d1 = -2 * torch.mm(matrix1, matrix2.T) # shape (num_test, num_train)
	d2 = torch.sum(torch.square(matrix1), axis=1, keepdims=True) # shape (num_test, 1)
	d3 = torch.sum(torch.square(matrix2), axis=1) # shape (num_train, )
	dists = torch.sqrt(d1 + d2 + d3) # broadcasting
	return dists


	def euclidean_distance_matrix_np(matrix1: np.ndarray, matrix2: np.ndarray) -> np.ndarray:
	"""
	Params:
	-- matrix1: N1 x D
	-- matrix2: N2 x D
	Returns:
	-- dists: N1 x N2
	dists[i, j] == distance(matrix1[i], matrix2[j])
	"""
	assert matrix1.shape[1] == matrix2.shape[1]
	d1 = -2 * np.dot(matrix1, matrix2.T) # shape (num_test, num_train)
	d2 = np.sum(np.square(matrix1), axis=1, keepdims=True) # shape (num_test, 1)
	d3 = np.sum(np.square(matrix2), axis=1) # shape (num_train, )
	dists = np.sqrt(d1 + d2 + d3) # broadcasting
	return dists


	def calculate_top_k(mat: torch.Tensor, top_k: int) -> torch.Tensor:
	size = mat.shape[0]
	gt_mat = (torch.unsqueeze(torch.arange(size), 1).to(mat.device).repeat_interleave(size, 1))
	bool_mat = mat == gt_mat
	correct_vec = False
	top_k_list = []
	for i in range(top_k):
	correct_vec = correct_vec \| bool_mat[:, i]
	top_k_list.append(correct_vec[:, None])
	top_k_mat = torch.cat(top_k_list, dim=1)
	return top_k_mat


	def calculate_activation_statistics(activations: torch.Tensor) -> tuple:
	"""
	Params:
	-- activation: num_samples x dim_feat
	Returns:
	-- mu: dim_feat
	-- sigma: dim_feat x dim_feat
	"""
	activations = activations.cpu().numpy()
	mu = np.mean(activations, axis=0)
	sigma = np.cov(activations, rowvar=False)
	return mu, sigma


	def calculate_activation_statistics_np(activations: np.ndarray) -> tuple:
	"""
	Params:
	-- activation: num_samples x dim_feat
	Returns:
	-- mu: dim_feat
	-- sigma: dim_feat x dim_feat
	"""
	mu = np.mean(activations, axis=0)
	cov = np.cov(activations, rowvar=False)
	return mu, cov


	def calculate_frechet_distance_np(
	mu1: np.ndarray,
	sigma1: np.ndarray,
	mu2: np.ndarray,
	sigma2: np.ndarray,
	eps: float = 1e-6) -> float:
	"""Numpy implementation of the Frechet Distance.
	The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
	and X_2 ~ N(mu_2, C_2) is
	d^2 = \|\|mu_1 - mu_2\|\|^2 + Tr(C_1 + C_2 - 2sqrt(C_1C_2)).
	Stable version by Dougal J. Sutherland.
	Params:
	-- mu1 : Numpy array containing the activations of a layer of the
	inception net (like returned by the function 'get_predictions')
	for generated samples.
	-- mu2 : The sample mean over activations, precalculated on an
	representative data set.
	-- sigma1: The covariance matrix over activations for generated samples.
	-- sigma2: The covariance matrix over activations, precalculated on an
	representative data set.
	Returns:
	-- : The Frechet Distance.
	"""

	mu1 = np.atleast_1d(mu1)
	mu2 = np.atleast_1d(mu2)

	sigma1 = np.atleast_2d(sigma1)
	sigma2 = np.atleast_2d(sigma2)

	assert (mu1.shape == mu2.shape
	), "Training and test mean vectors have different lengths"
	assert (sigma1.shape == sigma2.shape
	), "Training and test covariances have different dimensions"

	diff = mu1 - mu2
	# Product might be almost singular
	covmean, _ = scipy.linalg.sqrtm(sigma1.dot(sigma2), disp=False)
	if not np.isfinite(covmean).all():
	msg = ("fid calculation produces singular product; "
	"adding %s to diagonal of cov estimates") % eps
	print(msg)
	offset = np.eye(sigma1.shape[0]) * eps
	covmean = scipy.linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))

	# Numerical error might give slight imaginary component
	if np.iscomplexobj(covmean):
	if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
	m = np.max(np.abs(covmean.imag))
	raise ValueError("Imaginary component {}".format(m))
	covmean = covmean.real
	tr_covmean = np.trace(covmean)

	return diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean


	def calculate_diversity(activation: torch.Tensor, diversity_times: int) -> float:
	assert len(activation.shape) == 2
	assert activation.shape[0] > diversity_times
	num_samples = activation.shape[0]

	first_indices = np.random.choice(num_samples,
	diversity_times,
	replace=False)
	second_indices = np.random.choice(num_samples,
	diversity_times,
	replace=False)
	dist = linalg.norm(activation[first_indices] - activation[second_indices],
	axis=1)
	return dist.mean()


	def calculate_diversity_np(activation: np.ndarray, diversity_times: int) -> float:
	assert len(activation.shape) == 2
	assert activation.shape[0] >= diversity_times
	num_samples = activation.shape[0]

	first_indices = np.random.choice(num_samples,
	diversity_times,
	replace=False)
	second_indices = np.random.choice(num_samples,
	diversity_times,
	replace=False)
	dist = scipy.linalg.norm(activation[first_indices] -
	activation[second_indices],
	axis=1)
	return dist.mean()


	def calculate_multimodality_np(activation: np.ndarray, multimodality_times: int) -> float:
	assert len(activation.shape) == 3
	assert activation.shape[1] > multimodality_times
	num_per_sent = activation.shape[1]

	first_dices = np.random.choice(num_per_sent,
	multimodality_times,
	replace=False)
	second_dices = np.random.choice(num_per_sent,
	multimodality_times,
	replace=False)
	dist = scipy.linalg.norm(activation[:, first_dices] -
	activation[:, second_dices],
	axis=2)
	return dist.mean()


	# Motion Control

	def calculate_skating_ratio(motions: torch.Tensor, dataset_name: str) -> tuple:
	thresh_height = 0.05
	fps = 20.0
	thresh_vel = 0.50
	avg_window = 5 # frames

	# XZ plane, y up
	# 10 left, 11 right foot. (HumanML3D)
	# 15 left, 20 right foot. (KIT)
	# motions [bsz, fs, 22 or 21, 3]

	if dataset_name == 'humanml3d':
	foot_idx = [10, 11]
	elif dataset_name == 'kit':
	foot_idx = [15, 20]
	else:
	raise ValueError(f'Invalid Dataset: {dataset_name}')

	verts_feet = motions[:, :, foot_idx, :].detach().cpu().numpy() # [bsz, fs, 2, 3]
	verts_feet_plane_vel = np.linalg.norm(verts_feet[:, 1:, :, [0, 2]] -
	verts_feet[:, :-1, :, [0, 2]], axis=-1) * fps # [bsz, fs-1, 2]
	vel_avg = uniform_filter1d(verts_feet_plane_vel, axis=1, size=avg_window, mode='constant', origin=0)

	verts_feet_height = verts_feet[:, :, :, 1] # [bsz, fs, 2]
	# If feet touch ground in adjacent frames
	feet_contact = np.logical_and((verts_feet_height[:, :-1, :] < thresh_height),
	(verts_feet_height[:, 1:, :] < thresh_height)) # [bs, fs-1, 2]
	# skate velocity
	skate_vel = feet_contact * vel_avg

	skating = np.logical_and(feet_contact, (verts_feet_plane_vel > thresh_vel))
	skating = np.logical_and(skating, (vel_avg > thresh_vel))

	# Both feet slide
	skating = np.logical_or(skating[:, :, 0], skating[:, :, 1]) # [bs, fs-1]
	skating_ratio = np.sum(skating, axis=1) / skating.shape[1]

	return skating_ratio, skate_vel


	def calculate_trajectory_error(dist_error: torch.Tensor, mean_err_traj: torch.Tensor,
	mask: torch.Tensor, strict: bool = True) -> torch.Tensor:
	if strict:
	# Traj fails if any of the key frame fails
	traj_fail_02 = 1.0 - int((dist_error <= 0.2).all().item())
	traj_fail_05 = 1.0 - int((dist_error <= 0.5).all().item())
	else:
	# Traj fails if the mean error of all keyframes more than the threshold
	traj_fail_02 = int((mean_err_traj > 0.2).item())
	traj_fail_05 = int((mean_err_traj > 0.5).item())
	all_fail_02 = (dist_error > 0.2).sum() / mask.sum()
	all_fail_05 = (dist_error > 0.5).sum() / mask.sum()

	return torch.tensor([traj_fail_02, traj_fail_05, all_fail_02, all_fail_05, dist_error.sum() / mask.sum()])


	def control_l2(motion: torch.Tensor, hint: torch.Tensor, hint_mask: torch.Tensor) -> torch.Tensor:
	loss = torch.norm((motion - hint) * hint_mask, p=2, dim=-1)
	return loss