wxDai's picture
[Init]
eb339cb
import numpy as np
import scipy.linalg
from scipy.ndimage import uniform_filter1d
import torch
from torch import linalg
# Motion Reconstruction
def calculate_mpjpe(gt_joints: torch.Tensor, pred_joints: torch.Tensor, align_root: bool = True) -> torch.Tensor:
"""
gt_joints: num_poses x num_joints x 3
pred_joints: num_poses x num_joints x 3
(obtained from recover_from_ric())
"""
assert gt_joints.shape == pred_joints.shape, \
f"GT shape: {gt_joints.shape}, pred shape: {pred_joints.shape}"
# Align by root (pelvis)
if align_root:
gt_joints = gt_joints - gt_joints[:, [0]]
pred_joints = pred_joints - pred_joints[:, [0]]
# Compute MPJPE
mpjpe = torch.linalg.norm(pred_joints - gt_joints, dim=-1) # num_poses x num_joints
mpjpe = mpjpe.mean(-1) # num_poses
return mpjpe
# Text-to-Motion
# (X - X_train)*(X - X_train) = -2X*X_train + X*X + X_train*X_train
def euclidean_distance_matrix(matrix1: torch.Tensor, matrix2: torch.Tensor) -> torch.Tensor:
"""
Params:
-- matrix1: N1 x D
-- matrix2: N2 x D
Returns:
-- dists: N1 x N2
dists[i, j] == distance(matrix1[i], matrix2[j])
"""
assert matrix1.shape[1] == matrix2.shape[1]
d1 = -2 * torch.mm(matrix1, matrix2.T) # shape (num_test, num_train)
d2 = torch.sum(torch.square(matrix1), axis=1, keepdims=True) # shape (num_test, 1)
d3 = torch.sum(torch.square(matrix2), axis=1) # shape (num_train, )
dists = torch.sqrt(d1 + d2 + d3) # broadcasting
return dists
def euclidean_distance_matrix_np(matrix1: np.ndarray, matrix2: np.ndarray) -> np.ndarray:
"""
Params:
-- matrix1: N1 x D
-- matrix2: N2 x D
Returns:
-- dists: N1 x N2
dists[i, j] == distance(matrix1[i], matrix2[j])
"""
assert matrix1.shape[1] == matrix2.shape[1]
d1 = -2 * np.dot(matrix1, matrix2.T) # shape (num_test, num_train)
d2 = np.sum(np.square(matrix1), axis=1, keepdims=True) # shape (num_test, 1)
d3 = np.sum(np.square(matrix2), axis=1) # shape (num_train, )
dists = np.sqrt(d1 + d2 + d3) # broadcasting
return dists
def calculate_top_k(mat: torch.Tensor, top_k: int) -> torch.Tensor:
size = mat.shape[0]
gt_mat = (torch.unsqueeze(torch.arange(size), 1).to(mat.device).repeat_interleave(size, 1))
bool_mat = mat == gt_mat
correct_vec = False
top_k_list = []
for i in range(top_k):
correct_vec = correct_vec | bool_mat[:, i]
top_k_list.append(correct_vec[:, None])
top_k_mat = torch.cat(top_k_list, dim=1)
return top_k_mat
def calculate_activation_statistics(activations: torch.Tensor) -> tuple:
"""
Params:
-- activation: num_samples x dim_feat
Returns:
-- mu: dim_feat
-- sigma: dim_feat x dim_feat
"""
activations = activations.cpu().numpy()
mu = np.mean(activations, axis=0)
sigma = np.cov(activations, rowvar=False)
return mu, sigma
def calculate_activation_statistics_np(activations: np.ndarray) -> tuple:
"""
Params:
-- activation: num_samples x dim_feat
Returns:
-- mu: dim_feat
-- sigma: dim_feat x dim_feat
"""
mu = np.mean(activations, axis=0)
cov = np.cov(activations, rowvar=False)
return mu, cov
def calculate_frechet_distance_np(
mu1: np.ndarray,
sigma1: np.ndarray,
mu2: np.ndarray,
sigma2: np.ndarray,
eps: float = 1e-6) -> float:
"""Numpy implementation of the Frechet Distance.
The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
and X_2 ~ N(mu_2, C_2) is
d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
Stable version by Dougal J. Sutherland.
Params:
-- mu1 : Numpy array containing the activations of a layer of the
inception net (like returned by the function 'get_predictions')
for generated samples.
-- mu2 : The sample mean over activations, precalculated on an
representative data set.
-- sigma1: The covariance matrix over activations for generated samples.
-- sigma2: The covariance matrix over activations, precalculated on an
representative data set.
Returns:
-- : The Frechet Distance.
"""
mu1 = np.atleast_1d(mu1)
mu2 = np.atleast_1d(mu2)
sigma1 = np.atleast_2d(sigma1)
sigma2 = np.atleast_2d(sigma2)
assert (mu1.shape == mu2.shape
), "Training and test mean vectors have different lengths"
assert (sigma1.shape == sigma2.shape
), "Training and test covariances have different dimensions"
diff = mu1 - mu2
# Product might be almost singular
covmean, _ = scipy.linalg.sqrtm(sigma1.dot(sigma2), disp=False)
if not np.isfinite(covmean).all():
msg = ("fid calculation produces singular product; "
"adding %s to diagonal of cov estimates") % eps
print(msg)
offset = np.eye(sigma1.shape[0]) * eps
covmean = scipy.linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
# Numerical error might give slight imaginary component
if np.iscomplexobj(covmean):
if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
m = np.max(np.abs(covmean.imag))
raise ValueError("Imaginary component {}".format(m))
covmean = covmean.real
tr_covmean = np.trace(covmean)
return diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean
def calculate_diversity(activation: torch.Tensor, diversity_times: int) -> float:
assert len(activation.shape) == 2
assert activation.shape[0] > diversity_times
num_samples = activation.shape[0]
first_indices = np.random.choice(num_samples,
diversity_times,
replace=False)
second_indices = np.random.choice(num_samples,
diversity_times,
replace=False)
dist = linalg.norm(activation[first_indices] - activation[second_indices],
axis=1)
return dist.mean()
def calculate_diversity_np(activation: np.ndarray, diversity_times: int) -> float:
assert len(activation.shape) == 2
assert activation.shape[0] >= diversity_times
num_samples = activation.shape[0]
first_indices = np.random.choice(num_samples,
diversity_times,
replace=False)
second_indices = np.random.choice(num_samples,
diversity_times,
replace=False)
dist = scipy.linalg.norm(activation[first_indices] -
activation[second_indices],
axis=1)
return dist.mean()
def calculate_multimodality_np(activation: np.ndarray, multimodality_times: int) -> float:
assert len(activation.shape) == 3
assert activation.shape[1] > multimodality_times
num_per_sent = activation.shape[1]
first_dices = np.random.choice(num_per_sent,
multimodality_times,
replace=False)
second_dices = np.random.choice(num_per_sent,
multimodality_times,
replace=False)
dist = scipy.linalg.norm(activation[:, first_dices] -
activation[:, second_dices],
axis=2)
return dist.mean()
# Motion Control
def calculate_skating_ratio(motions: torch.Tensor, dataset_name: str) -> tuple:
thresh_height = 0.05
fps = 20.0
thresh_vel = 0.50
avg_window = 5 # frames
# XZ plane, y up
# 10 left, 11 right foot. (HumanML3D)
# 15 left, 20 right foot. (KIT)
# motions [bsz, fs, 22 or 21, 3]
if dataset_name == 'humanml3d':
foot_idx = [10, 11]
elif dataset_name == 'kit':
foot_idx = [15, 20]
else:
raise ValueError(f'Invalid Dataset: {dataset_name}')
verts_feet = motions[:, :, foot_idx, :].detach().cpu().numpy() # [bsz, fs, 2, 3]
verts_feet_plane_vel = np.linalg.norm(verts_feet[:, 1:, :, [0, 2]] -
verts_feet[:, :-1, :, [0, 2]], axis=-1) * fps # [bsz, fs-1, 2]
vel_avg = uniform_filter1d(verts_feet_plane_vel, axis=1, size=avg_window, mode='constant', origin=0)
verts_feet_height = verts_feet[:, :, :, 1] # [bsz, fs, 2]
# If feet touch ground in adjacent frames
feet_contact = np.logical_and((verts_feet_height[:, :-1, :] < thresh_height),
(verts_feet_height[:, 1:, :] < thresh_height)) # [bs, fs-1, 2]
# skate velocity
skate_vel = feet_contact * vel_avg
skating = np.logical_and(feet_contact, (verts_feet_plane_vel > thresh_vel))
skating = np.logical_and(skating, (vel_avg > thresh_vel))
# Both feet slide
skating = np.logical_or(skating[:, :, 0], skating[:, :, 1]) # [bs, fs-1]
skating_ratio = np.sum(skating, axis=1) / skating.shape[1]
return skating_ratio, skate_vel
def calculate_trajectory_error(dist_error: torch.Tensor, mean_err_traj: torch.Tensor,
mask: torch.Tensor, strict: bool = True) -> torch.Tensor:
if strict:
# Traj fails if any of the key frame fails
traj_fail_02 = 1.0 - int((dist_error <= 0.2).all().item())
traj_fail_05 = 1.0 - int((dist_error <= 0.5).all().item())
else:
# Traj fails if the mean error of all keyframes more than the threshold
traj_fail_02 = int((mean_err_traj > 0.2).item())
traj_fail_05 = int((mean_err_traj > 0.5).item())
all_fail_02 = (dist_error > 0.2).sum() / mask.sum()
all_fail_05 = (dist_error > 0.5).sum() / mask.sum()
return torch.tensor([traj_fail_02, traj_fail_05, all_fail_02, all_fail_05, dist_error.sum() / mask.sum()])
def control_l2(motion: torch.Tensor, hint: torch.Tensor, hint_mask: torch.Tensor) -> torch.Tensor:
loss = torch.norm((motion - hint) * hint_mask, p=2, dim=-1)
return loss