|
from typing import Dict, Mapping, Optional |
|
import math |
|
import torch |
|
import torch.nn as nn |
|
from torch_cluster import radius, radius_graph |
|
from torch_geometric.data import HeteroData, Batch |
|
from torch_geometric.utils import dense_to_sparse, subgraph |
|
|
|
from dev.modules.layers import * |
|
from dev.modules.map_decoder import discretize_neighboring |
|
from dev.utils.geometry import angle_between_2d_vectors, wrap_angle |
|
from dev.utils.weight_init import weight_init |
|
|
|
|
|
def cal_polygon_contour(x, y, theta, width, length): |
|
left_front_x = x + 0.5 * length * math.cos(theta) - 0.5 * width * math.sin(theta) |
|
left_front_y = y + 0.5 * length * math.sin(theta) + 0.5 * width * math.cos(theta) |
|
left_front = (left_front_x, left_front_y) |
|
|
|
right_front_x = x + 0.5 * length * math.cos(theta) + 0.5 * width * math.sin(theta) |
|
right_front_y = y + 0.5 * length * math.sin(theta) - 0.5 * width * math.cos(theta) |
|
right_front = (right_front_x, right_front_y) |
|
|
|
right_back_x = x - 0.5 * length * math.cos(theta) + 0.5 * width * math.sin(theta) |
|
right_back_y = y - 0.5 * length * math.sin(theta) - 0.5 * width * math.cos(theta) |
|
right_back = (right_back_x, right_back_y) |
|
|
|
left_back_x = x - 0.5 * length * math.cos(theta) - 0.5 * width * math.sin(theta) |
|
left_back_y = y - 0.5 * length * math.sin(theta) + 0.5 * width * math.cos(theta) |
|
left_back = (left_back_x, left_back_y) |
|
polygon_contour = [left_front, right_front, right_back, left_back] |
|
|
|
return polygon_contour |
|
|
|
|
|
class SMARTAgentDecoder(nn.Module): |
|
|
|
def __init__(self, |
|
dataset: str, |
|
input_dim: int, |
|
hidden_dim: int, |
|
num_historical_steps: int, |
|
num_interaction_steps: int, |
|
time_span: Optional[int], |
|
pl2a_radius: float, |
|
pl2seed_radius: float, |
|
a2a_radius: float, |
|
num_freq_bands: int, |
|
num_layers: int, |
|
num_heads: int, |
|
head_dim: int, |
|
dropout: float, |
|
token_data: Dict, |
|
token_size: int, |
|
special_token_index: list=[], |
|
predict_motion: bool=False, |
|
predict_state: bool=False, |
|
predict_map: bool=False, |
|
state_token: Dict[str, int]=None, |
|
seed_size: int=5) -> None: |
|
|
|
super(SMARTAgentDecoder, self).__init__() |
|
self.dataset = dataset |
|
self.input_dim = input_dim |
|
self.hidden_dim = hidden_dim |
|
self.num_historical_steps = num_historical_steps |
|
self.num_interaction_steps = num_interaction_steps |
|
self.time_span = time_span if time_span is not None else num_historical_steps |
|
self.pl2a_radius = pl2a_radius |
|
self.pl2seed_radius = pl2seed_radius |
|
self.a2a_radius = a2a_radius |
|
self.num_freq_bands = num_freq_bands |
|
self.num_layers = num_layers |
|
self.num_heads = num_heads |
|
self.head_dim = head_dim |
|
self.dropout = dropout |
|
self.special_token_index = special_token_index |
|
self.predict_motion = predict_motion |
|
self.predict_state = predict_state |
|
self.predict_map = predict_map |
|
|
|
|
|
self.state_type = list(state_token.keys()) |
|
self.state_token = state_token |
|
self.invalid_state = int(state_token['invalid']) |
|
self.valid_state = int(state_token['valid']) |
|
self.enter_state = int(state_token['enter']) |
|
self.exit_state = int(state_token['exit']) |
|
|
|
self.seed_state_type = ['invalid', 'enter'] |
|
self.valid_state_type = ['invalid', 'valid', 'exit'] |
|
|
|
input_dim_x_a = 2 |
|
input_dim_r_t = 4 |
|
input_dim_r_pt2a = 3 |
|
input_dim_r_a2a = 3 |
|
input_dim_token = 8 |
|
|
|
self.seed_size = seed_size |
|
|
|
self.all_agent_type = ['veh', 'ped', 'cyc', 'background', 'invalid', 'seed'] |
|
self.seed_agent_type = ['veh', 'ped', 'cyc', 'seed'] |
|
self.type_a_emb = nn.Embedding(len(self.all_agent_type), hidden_dim) |
|
self.shape_emb = MLPLayer(3, hidden_dim, hidden_dim) |
|
if self.predict_state: |
|
self.state_a_emb = nn.Embedding(len(self.state_type), hidden_dim) |
|
self.invalid_shape_value = .1 |
|
self.motion_gap = 1. |
|
self.heading_gap = 1. |
|
|
|
self.x_a_emb = FourierEmbedding(input_dim=input_dim_x_a, hidden_dim=hidden_dim, num_freq_bands=num_freq_bands) |
|
self.r_t_emb = FourierEmbedding(input_dim=input_dim_r_t, hidden_dim=hidden_dim, num_freq_bands=num_freq_bands) |
|
self.r_pt2a_emb = FourierEmbedding(input_dim=input_dim_r_pt2a, hidden_dim=hidden_dim, |
|
num_freq_bands=num_freq_bands) |
|
self.r_a2a_emb = FourierEmbedding(input_dim=input_dim_r_a2a, hidden_dim=hidden_dim, |
|
num_freq_bands=num_freq_bands) |
|
self.token_emb_veh = MLPEmbedding(input_dim=input_dim_token, hidden_dim=hidden_dim) |
|
self.token_emb_ped = MLPEmbedding(input_dim=input_dim_token, hidden_dim=hidden_dim) |
|
self.token_emb_cyc = MLPEmbedding(input_dim=input_dim_token, hidden_dim=hidden_dim) |
|
self.no_token_emb = nn.Embedding(1, hidden_dim) |
|
self.bos_token_emb = nn.Embedding(1, hidden_dim) |
|
|
|
self.token_emb_offset = MLPEmbedding(input_dim=2, hidden_dim=hidden_dim) |
|
|
|
num_inputs = 2 |
|
if self.predict_state: |
|
num_inputs = 3 |
|
self.fusion_emb = MLPEmbedding(input_dim=self.hidden_dim * num_inputs, hidden_dim=self.hidden_dim) |
|
|
|
self.t_attn_layers = nn.ModuleList( |
|
[AttentionLayer(hidden_dim=hidden_dim, num_heads=num_heads, head_dim=head_dim, dropout=dropout, |
|
bipartite=False, has_pos_emb=True) for _ in range(num_layers)] |
|
) |
|
self.pt2a_attn_layers = nn.ModuleList( |
|
[AttentionLayer(hidden_dim=hidden_dim, num_heads=num_heads, head_dim=head_dim, dropout=dropout, |
|
bipartite=True, has_pos_emb=True) for _ in range(num_layers)] |
|
) |
|
self.a2a_attn_layers = nn.ModuleList( |
|
[AttentionLayer(hidden_dim=hidden_dim, num_heads=num_heads, head_dim=head_dim, dropout=dropout, |
|
bipartite=False, has_pos_emb=True) for _ in range(num_layers)] |
|
) |
|
self.token_size = token_size |
|
|
|
self.token_predict_head = MLPLayer(input_dim=hidden_dim, hidden_dim=hidden_dim, |
|
output_dim=self.token_size) |
|
|
|
if self.predict_state: |
|
|
|
self.seed_feature = nn.Embedding(self.seed_size, self.hidden_dim) |
|
|
|
self.state_predict_head = MLPLayer(input_dim=hidden_dim, hidden_dim=hidden_dim, |
|
output_dim=len(self.valid_state_type)) |
|
|
|
self.seed_head = MLPLayer(input_dim=hidden_dim, hidden_dim=hidden_dim, |
|
output_dim=hidden_dim) |
|
|
|
self.seed_state_predict_head = MLPLayer(input_dim=hidden_dim, hidden_dim=hidden_dim, |
|
output_dim=len(self.seed_state_type)) |
|
self.seed_type_predict_head = MLPLayer(input_dim=hidden_dim, hidden_dim=hidden_dim, |
|
output_dim=len(self.seed_agent_type)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
self.trajectory_token = token_data['token'] |
|
self.trajectory_token_traj = token_data['traj'] |
|
self.trajectory_token_all = token_data['token_all'] |
|
self.apply(weight_init) |
|
|
|
self.shift = 5 |
|
self.beam_size = 5 |
|
self.hist_mask = True |
|
self.temporal_attn_to_invalid = True |
|
self.temporal_attn_seed = False |
|
|
|
|
|
|
|
|
|
def transform_rel(self, token_traj, prev_pos, prev_heading=None): |
|
if prev_heading is None: |
|
diff_xy = prev_pos[:, :, -1, :] - prev_pos[:, :, -2, :] |
|
prev_heading = torch.arctan2(diff_xy[:, :, 1], diff_xy[:, :, 0]) |
|
|
|
num_agent, num_step, traj_num, traj_dim = token_traj.shape |
|
cos, sin = prev_heading.cos(), prev_heading.sin() |
|
rot_mat = torch.zeros((num_agent, num_step, 2, 2), device=prev_heading.device) |
|
rot_mat[:, :, 0, 0] = cos |
|
rot_mat[:, :, 0, 1] = -sin |
|
rot_mat[:, :, 1, 0] = sin |
|
rot_mat[:, :, 1, 1] = cos |
|
agent_diff_rel = torch.bmm(token_traj.view(-1, traj_num, 2), rot_mat.view(-1, 2, 2)).view(num_agent, num_step, traj_num, traj_dim) |
|
agent_pred_rel = agent_diff_rel + prev_pos[:, :, -1:, :] |
|
return agent_pred_rel |
|
|
|
def agent_token_embedding(self, data, agent_token_index, agent_state, pos_a, head_a, inference=False, |
|
filter_mask=None, av_index=None): |
|
|
|
if filter_mask is None: |
|
filter_mask = torch.ones_like(agent_state[:, 2], dtype=torch.bool) |
|
|
|
num_agent, num_step, traj_dim = pos_a.shape |
|
agent_type = data['agent']['type'][filter_mask] |
|
veh_mask = (agent_type == 0) |
|
ped_mask = (agent_type == 1) |
|
cyc_mask = (agent_type == 2) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
motion_vector_a, head_vector_a = self.build_vector_a(pos_a, head_a, agent_state) |
|
|
|
trajectory_token_veh = torch.from_numpy(self.trajectory_token['veh']).clone().to(pos_a.device).to(torch.float) |
|
trajectory_token_ped = torch.from_numpy(self.trajectory_token['ped']).clone().to(pos_a.device).to(torch.float) |
|
trajectory_token_cyc = torch.from_numpy(self.trajectory_token['cyc']).clone().to(pos_a.device).to(torch.float) |
|
self.agent_token_emb_veh = self.token_emb_veh(trajectory_token_veh.view(trajectory_token_veh.shape[0], -1)) |
|
self.agent_token_emb_ped = self.token_emb_ped(trajectory_token_ped.view(trajectory_token_ped.shape[0], -1)) |
|
self.agent_token_emb_cyc = self.token_emb_cyc(trajectory_token_cyc.view(trajectory_token_cyc.shape[0], -1)) |
|
|
|
|
|
self.agent_token_emb_veh = torch.cat([self.agent_token_emb_veh, self.bos_token_emb(torch.zeros(1, device=pos_a.device).long())]) |
|
self.agent_token_emb_ped = torch.cat([self.agent_token_emb_ped, self.bos_token_emb(torch.zeros(1, device=pos_a.device).long())]) |
|
self.agent_token_emb_cyc = torch.cat([self.agent_token_emb_cyc, self.bos_token_emb(torch.zeros(1, device=pos_a.device).long())]) |
|
|
|
|
|
self.agent_token_emb_veh = torch.cat([self.agent_token_emb_veh, self.no_token_emb(torch.zeros(1, device=pos_a.device).long())]) |
|
self.agent_token_emb_ped = torch.cat([self.agent_token_emb_ped, self.no_token_emb(torch.zeros(1, device=pos_a.device).long())]) |
|
self.agent_token_emb_cyc = torch.cat([self.agent_token_emb_cyc, self.no_token_emb(torch.zeros(1, device=pos_a.device).long())]) |
|
|
|
if inference: |
|
agent_token_traj_all = torch.zeros((num_agent, self.token_size, self.shift + 1, 4, 2), device=pos_a.device) |
|
trajectory_token_all_veh = torch.from_numpy(self.trajectory_token_all['veh']).clone().to(pos_a.device).to(torch.float) |
|
trajectory_token_all_ped = torch.from_numpy(self.trajectory_token_all['ped']).clone().to(pos_a.device).to(torch.float) |
|
trajectory_token_all_cyc = torch.from_numpy(self.trajectory_token_all['cyc']).clone().to(pos_a.device).to(torch.float) |
|
agent_token_traj_all[veh_mask] = torch.cat( |
|
[trajectory_token_all_veh[:, :self.shift], trajectory_token_veh[:, None, ...]], dim=1) |
|
agent_token_traj_all[ped_mask] = torch.cat( |
|
[trajectory_token_all_ped[:, :self.shift], trajectory_token_ped[:, None, ...]], dim=1) |
|
agent_token_traj_all[cyc_mask] = torch.cat( |
|
[trajectory_token_all_cyc[:, :self.shift], trajectory_token_cyc[:, None, ...]], dim=1) |
|
|
|
|
|
agent_token_emb = torch.zeros((num_agent, num_step, self.hidden_dim), device=pos_a.device) |
|
agent_token_emb[veh_mask] = self.agent_token_emb_veh[agent_token_index[veh_mask]] |
|
agent_token_emb[ped_mask] = self.agent_token_emb_ped[agent_token_index[ped_mask]] |
|
agent_token_emb[cyc_mask] = self.agent_token_emb_cyc[agent_token_index[cyc_mask]] |
|
|
|
|
|
is_invalid = (agent_state == self.invalid_state) & (agent_state != self.enter_state) |
|
agent_types = data['agent']['type'][filter_mask].long().repeat_interleave(repeats=num_step, dim=0) |
|
agent_types[is_invalid.reshape(-1)] = self.all_agent_type.index('invalid') |
|
agent_shapes = data['agent']['shape'][filter_mask, self.num_historical_steps - 1, :].repeat_interleave(repeats=num_step, dim=0) |
|
agent_shapes[is_invalid.reshape(-1)] = self.invalid_shape_value |
|
|
|
categorical_embs = [self.type_a_emb(agent_types), self.shape_emb(agent_shapes)] |
|
feature_a = torch.stack( |
|
[torch.norm(motion_vector_a[:, :, :2], p=2, dim=-1), |
|
angle_between_2d_vectors(ctr_vector=head_vector_a, nbr_vector=motion_vector_a[:, :, :2]), |
|
], dim=-1) |
|
|
|
x_a = self.x_a_emb(continuous_inputs=feature_a.view(-1, feature_a.size(-1)), |
|
categorical_embs=categorical_embs) |
|
x_a = x_a.view(-1, num_step, self.hidden_dim) |
|
|
|
s_a = self.state_a_emb(agent_state.reshape(-1).long()).reshape(num_agent, num_step, self.hidden_dim) |
|
feat_a = torch.cat((agent_token_emb, x_a, s_a), dim=-1) |
|
feat_a = self.fusion_emb(feat_a) |
|
|
|
|
|
motion_vector_seed = motion_vector_a[av_index : av_index + 1] |
|
head_vector_seed = head_vector_a[av_index : av_index + 1] |
|
feat_seed = self.build_invalid_agent_feature(num_step, pos_a.device, type_index=self.all_agent_type.index('seed'), |
|
motion_vector=motion_vector_seed, head_vector=head_vector_seed) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
feat_a = torch.cat([feat_a, feat_seed], dim=0) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if inference: |
|
return feat_a, head_vector_a, agent_token_traj_all, agent_token_emb, categorical_embs |
|
else: |
|
return feat_a, head_vector_a |
|
|
|
def build_vector_a(self, pos_a, head_a, state_a): |
|
num_agent = pos_a.shape[0] |
|
|
|
motion_vector_a = torch.cat([pos_a.new_zeros(num_agent, 1, self.input_dim), |
|
pos_a[:, 1:] - pos_a[:, :-1]], dim=1) |
|
|
|
|
|
is_bos = state_a == self.enter_state |
|
motion_vector_a[is_bos] = self.motion_gap |
|
|
|
is_last_eos = state_a.roll(shifts=1, dims=1) == self.exit_state |
|
is_last_eos[:, 0] = False |
|
motion_vector_a[is_last_eos] = -self.motion_gap |
|
|
|
head_vector_a = torch.stack([head_a.cos(), head_a.sin()], dim=-1) |
|
|
|
return motion_vector_a, head_vector_a |
|
|
|
def build_invalid_agent_feature(self, num_step, device, motion_vector=None, head_vector=None, type_index=None, shape_value=None): |
|
invalid_agent_token_emb = self.no_token_emb(torch.zeros(1, device=device).long())[:, None].repeat(1, num_step, 1) |
|
|
|
if motion_vector is None or head_vector is None: |
|
motion_vector = torch.zeros((1, num_step, 2), device=device) |
|
head_vector = torch.stack([torch.cos(torch.zeros(1, device=device)), torch.sin(torch.zeros(1, device=device))], dim=-1)[:, None, :].repeat(1, num_step, 1) |
|
|
|
feature_ina = torch.stack( |
|
[torch.norm(motion_vector[:, :, :2], p=2, dim=-1), |
|
angle_between_2d_vectors(ctr_vector=head_vector, nbr_vector=motion_vector[:, :, :2]), |
|
], dim=-1) |
|
|
|
if type_index is None: |
|
type_index = self.all_agent_type.index('invalid') |
|
if shape_value is None: |
|
shape_value = torch.full((1, 3), self.invalid_shape_value, device=device) |
|
|
|
categorical_embs_ina = [self.type_a_emb(torch.tensor([type_index], device=device)), |
|
self.shape_emb(shape_value)] |
|
x_ina = self.x_a_emb(continuous_inputs=feature_ina.view(-1, feature_ina.size(-1)), |
|
categorical_embs=categorical_embs_ina) |
|
x_ina = x_ina.view(-1, num_step, self.hidden_dim) |
|
|
|
s_ina = self.state_a_emb(torch.tensor([self.invalid_state], device=device))[:, None].repeat(1, num_step, 1) |
|
|
|
feat_ina = torch.cat((invalid_agent_token_emb, x_ina, s_ina), dim=-1) |
|
feat_ina = self.fusion_emb(feat_ina) |
|
|
|
return feat_ina |
|
|
|
def build_temporal_edge(self, pos_a, head_a, head_vector_a, state_a, mask, inference_mask=None, av_index=None): |
|
|
|
num_agent = pos_a.shape[0] |
|
hist_mask = mask.clone() |
|
|
|
if not self.temporal_attn_to_invalid: |
|
hist_mask[state_a == self.invalid_state] = False |
|
|
|
|
|
ego_pos_a = pos_a[av_index].clone() |
|
ego_head_a = head_a[av_index].clone() |
|
ego_head_vector_a = head_vector_a[av_index].clone() |
|
ego_state_a = state_a[av_index].clone() |
|
|
|
|
|
|
|
|
|
|
|
pos_a = torch.cat([pos_a, ego_pos_a[None]], dim=0) |
|
head_a = torch.cat([head_a, ego_head_a[None]], dim=0) |
|
state_a = torch.cat([state_a, ego_state_a[None]], dim=0) |
|
head_vector_a = torch.cat([head_vector_a, ego_head_vector_a[None]], dim=0) |
|
hist_mask = torch.cat([hist_mask, torch.ones_like(hist_mask[0:1])], dim=0).bool() |
|
if not self.temporal_attn_seed: |
|
hist_mask[-1:] = False |
|
if inference_mask is not None: |
|
inference_mask[-1:] = False |
|
|
|
pos_t = pos_a.reshape(-1, self.input_dim) |
|
head_t = head_a.reshape(-1) |
|
head_vector_t = head_vector_a.reshape(-1, 2) |
|
|
|
|
|
is_bos = state_a == self.enter_state |
|
is_bos[-1] = False |
|
bos_index = torch.where(is_bos.any(dim=1), torch.argmax(is_bos.long(), dim=1), torch.tensor(0)) |
|
motion_predict_start_index = torch.clamp(bos_index - self.time_span / self.shift + 1, min=0) |
|
motion_predict_mask = torch.arange(hist_mask.shape[1]).expand(hist_mask.shape[0], -1).to(hist_mask.device) |
|
motion_predict_mask = motion_predict_mask >= motion_predict_start_index[:, None] |
|
hist_mask[~motion_predict_mask] = False |
|
|
|
if self.hist_mask and self.training: |
|
hist_mask[ |
|
torch.arange(mask.shape[0]).unsqueeze(1), torch.randint(0, mask.shape[1], (num_agent, 10))] = False |
|
mask_t = hist_mask.unsqueeze(2) & hist_mask.unsqueeze(1) |
|
elif inference_mask is not None: |
|
mask_t = hist_mask.unsqueeze(2) & inference_mask.unsqueeze(1) |
|
else: |
|
mask_t = hist_mask.unsqueeze(2) & hist_mask.unsqueeze(1) |
|
|
|
|
|
edge_index_t = dense_to_sparse(mask_t)[0] |
|
edge_index_t = edge_index_t[:, (edge_index_t[1] - edge_index_t[0] > 0) & |
|
(edge_index_t[1] - edge_index_t[0] <= self.time_span / self.shift)] |
|
rel_pos_t = pos_t[edge_index_t[0]] - pos_t[edge_index_t[1]] |
|
rel_head_t = wrap_angle(head_t[edge_index_t[0]] - head_t[edge_index_t[1]]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
is_invalid = state_a == self.invalid_state |
|
is_invalid_t = is_invalid.reshape(-1) |
|
rel_pos_t[is_invalid_t[edge_index_t[0]]] = -self.motion_gap |
|
rel_pos_t[is_invalid_t[edge_index_t[1]]] = self.motion_gap |
|
rel_head_t[is_invalid_t[edge_index_t[0]]] = -self.heading_gap |
|
rel_head_t[is_invalid_t[edge_index_t[1]]] = self.heading_gap |
|
|
|
r_t = torch.stack( |
|
[torch.norm(rel_pos_t[:, :2], p=2, dim=-1), |
|
angle_between_2d_vectors(ctr_vector=head_vector_t[edge_index_t[1]], nbr_vector=rel_pos_t[:, :2]), |
|
rel_head_t, |
|
edge_index_t[0] - edge_index_t[1]], dim=-1) |
|
r_t = self.r_t_emb(continuous_inputs=r_t, categorical_embs=None) |
|
|
|
return edge_index_t, r_t |
|
|
|
def build_interaction_edge(self, pos_a, head_a, head_vector_a, state_a, batch_s, mask_a, inference_mask=None, av_index=None): |
|
num_agent, num_step, _ = pos_a.shape |
|
|
|
pos_a = torch.cat([pos_a, pos_a[av_index][None]], dim=0) |
|
head_a = torch.cat([head_a, head_a[av_index][None]], dim=0) |
|
state_a = torch.cat([state_a, state_a[av_index][None]], dim=0) |
|
head_vector_a = torch.cat([head_vector_a, head_vector_a[av_index][None]], dim=0) |
|
|
|
pos_s = pos_a.transpose(0, 1).reshape(-1, self.input_dim) |
|
head_s = head_a.transpose(0, 1).reshape(-1) |
|
head_vector_s = head_vector_a.transpose(0, 1).reshape(-1, 2) |
|
if inference_mask is not None: |
|
mask_a = mask_a & inference_mask |
|
mask_s = mask_a.transpose(0, 1).reshape(-1) |
|
|
|
|
|
mask_seed = state_a[av_index] != self.invalid_state |
|
pos_seed = pos_a[av_index] |
|
edge_index_seed2a = radius(x=pos_seed[:, :2], y=pos_s[:, :2], r=self.pl2seed_radius, |
|
batch_x=torch.arange(num_step).to(pos_s.device), batch_y=batch_s, max_num_neighbors=300) |
|
edge_index_seed2a = edge_index_seed2a[:, mask_s[edge_index_seed2a[0]] & mask_seed[edge_index_seed2a[1]]] |
|
|
|
|
|
edge_index_seed2a[1, :] = (edge_index_seed2a[1, :] + 1) * (num_agent + 1) - 1 |
|
|
|
|
|
edge_index_a2a = radius_graph(x=pos_s[:, :2], r=self.a2a_radius, batch=batch_s, loop=False, |
|
max_num_neighbors=300) |
|
edge_index_a2a = subgraph(subset=mask_s, edge_index=edge_index_a2a)[0] |
|
|
|
|
|
edge_index_a2a = torch.cat([edge_index_a2a, edge_index_seed2a], dim=-1) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rel_pos_a2a = pos_s[edge_index_a2a[0]] - pos_s[edge_index_a2a[1]] |
|
rel_head_a2a = wrap_angle(head_s[edge_index_a2a[0]] - head_s[edge_index_a2a[1]]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
is_invalid = state_a == self.invalid_state |
|
is_invalid_s = is_invalid.transpose(0, 1).reshape(-1) |
|
rel_pos_a2a[is_invalid_s[edge_index_a2a[0]]] = -self.motion_gap |
|
rel_pos_a2a[is_invalid_s[edge_index_a2a[1]]] = self.motion_gap |
|
rel_head_a2a[is_invalid_s[edge_index_a2a[0]]] = -self.heading_gap |
|
rel_head_a2a[is_invalid_s[edge_index_a2a[1]]] = self.heading_gap |
|
|
|
r_a2a = torch.stack( |
|
[torch.norm(rel_pos_a2a[:, :2], p=2, dim=-1), |
|
angle_between_2d_vectors(ctr_vector=head_vector_s[edge_index_a2a[1]], nbr_vector=rel_pos_a2a[:, :2]), |
|
rel_head_a2a], dim=-1) |
|
r_a2a = self.r_a2a_emb(continuous_inputs=r_a2a, categorical_embs=None) |
|
|
|
return edge_index_a2a, r_a2a |
|
|
|
def build_map2agent_edge(self, data, num_step, pos_a, head_a, head_vector_a, state_a, batch_s, batch_pl, |
|
mask, inference_mask=None, av_index=None): |
|
|
|
num_agent, num_step, _ = pos_a.shape |
|
|
|
mask_pl2a = mask.clone() |
|
if inference_mask is not None: |
|
mask_pl2a = mask_pl2a & inference_mask |
|
mask_pl2a = mask_pl2a.transpose(0, 1).reshape(-1) |
|
|
|
pos_a = torch.cat([pos_a, pos_a[av_index][None]], dim=0) |
|
state_a = torch.cat([state_a, state_a[av_index][None]], dim=0) |
|
head_a = torch.cat([head_a, head_a[av_index][None]], dim=0) |
|
head_vector_a = torch.cat([head_vector_a, head_vector_a[av_index][None]], dim=0) |
|
|
|
pos_s = pos_a.transpose(0, 1).reshape(-1, self.input_dim) |
|
head_s = head_a.transpose(0, 1).reshape(-1) |
|
head_vector_s = head_vector_a.transpose(0, 1).reshape(-1, 2) |
|
|
|
ori_pos_pl = data['pt_token']['position'][:, :self.input_dim].contiguous() |
|
ori_orient_pl = data['pt_token']['orientation'].contiguous() |
|
pos_pl = ori_pos_pl.repeat(num_step, 1) |
|
orient_pl = ori_orient_pl.repeat(num_step) |
|
|
|
|
|
mask_seed = state_a[av_index] != self.invalid_state |
|
pos_seed = pos_a[av_index] |
|
edge_index_pl2seed = radius(x=pos_seed[:, :2], y=pos_pl[:, :2], r=self.pl2seed_radius, |
|
batch_x=torch.arange(num_step).to(pos_s.device), batch_y=batch_pl, max_num_neighbors=600) |
|
edge_index_pl2seed = edge_index_pl2seed[:, mask_seed[edge_index_pl2seed[1]]] |
|
|
|
|
|
edge_index_pl2seed[1, :] = (edge_index_pl2seed[1, :] + 1) * (num_agent + 1) - 1 |
|
|
|
|
|
|
|
edge_index_pl2a = radius(x=pos_s[:, :2], y=pos_pl[:, :2], r=self.pl2a_radius, |
|
batch_x=batch_s, batch_y=batch_pl, max_num_neighbors=300) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
edge_index_pl2a = edge_index_pl2a[:, mask_pl2a[edge_index_pl2a[1]]] |
|
|
|
|
|
edge_index_pl2a = torch.cat([edge_index_pl2a, edge_index_pl2seed], dim=-1) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rel_pos_pl2a = pos_pl[edge_index_pl2a[0]] - pos_s[edge_index_pl2a[1]] |
|
rel_orient_pl2a = wrap_angle(orient_pl[edge_index_pl2a[0]] - head_s[edge_index_pl2a[1]]) |
|
|
|
|
|
is_invalid = state_a == self.invalid_state |
|
is_invalid_s = is_invalid.transpose(0, 1).reshape(-1) |
|
rel_pos_pl2a[is_invalid_s[edge_index_pl2a[1]]] = self.motion_gap |
|
rel_orient_pl2a[is_invalid_s[edge_index_pl2a[1]]] = self.heading_gap |
|
|
|
r_pl2a = torch.stack( |
|
[torch.norm(rel_pos_pl2a[:, :2], p=2, dim=-1), |
|
angle_between_2d_vectors(ctr_vector=head_vector_s[edge_index_pl2a[1]], nbr_vector=rel_pos_pl2a[:, :2]), |
|
rel_orient_pl2a], dim=-1) |
|
r_pl2a = self.r_pt2a_emb(continuous_inputs=r_pl2a, categorical_embs=None) |
|
|
|
return edge_index_pl2a, r_pl2a |
|
|
|
def get_inputs(self, data: HeteroData) -> Dict[str, torch.Tensor]: |
|
|
|
pos_a = data['agent']['token_pos'] |
|
head_a = data['agent']['token_heading'] |
|
agent_category = data['agent']['category'] |
|
agent_token_index = data['agent']['token_idx'] |
|
agent_state_index = data['agent']['state_idx'] |
|
mask = data['agent']['raw_agent_valid_mask'].clone() |
|
|
|
|
|
if not self.predict_state: |
|
agent_state_index = None |
|
|
|
next_token_index_gt = agent_token_index.roll(shifts=-1, dims=1) |
|
next_state_index_gt = agent_state_index.roll(shifts=-1, dims=1) |
|
|
|
if self.predict_state: |
|
next_token_eval_mask = mask.clone() |
|
next_token_eval_mask = next_token_eval_mask * next_token_eval_mask.roll(shifts=1, dims=1) |
|
bos_token_index = torch.nonzero(agent_state_index == 2) |
|
eos_token_index = torch.nonzero(agent_state_index == 3) |
|
next_token_eval_mask[bos_token_index[:, 0], bos_token_index[:, 1]] = 1 |
|
for eos_token_index_ in eos_token_index: |
|
if not next_token_eval_mask[eos_token_index_[0], eos_token_index_[1]]: |
|
next_token_eval_mask[eos_token_index_[0], eos_token_index_[1]:] = 0 |
|
next_token_eval_mask = next_token_eval_mask.roll(shifts=-1, dims=1) |
|
|
|
|
|
if next_token_index_gt[next_token_eval_mask].min() < 0: |
|
raise RuntimeError() |
|
|
|
next_token_eval_mask[:, -1] = False |
|
|
|
return {'token_pos': pos_a, |
|
'token_heading': head_a, |
|
'agent_category': agent_category, |
|
'next_token_idx_gt': next_token_index_gt, |
|
'next_state_idx_gt': next_state_index_gt, |
|
'next_token_eval_mask': next_token_eval_mask, |
|
'raw_agent_valid_mask': data['agent']['raw_agent_valid_mask'], |
|
} |
|
|
|
def forward(self, |
|
data: HeteroData, |
|
map_enc: Mapping[str, torch.Tensor]) -> Dict[str, torch.Tensor]: |
|
|
|
pos_a = data['agent']['token_pos'].clone() |
|
head_a = data['agent']['token_heading'].clone() |
|
num_agent, num_step, traj_dim = pos_a.shape |
|
agent_category = data['agent']['category'].clone() |
|
agent_token_index = data['agent']['token_idx'].clone() |
|
agent_state_index = data['agent']['state_idx'].clone() |
|
agent_type_index = data['agent']['type'].clone() |
|
agent_enter_pl_token_idx = None |
|
agent_enter_offset_token_idx = None |
|
|
|
device = pos_a.device |
|
|
|
seed_step_mask = agent_state_index[:, 1:] == self.enter_state |
|
if torch.any(seed_step_mask.sum(dim=0) > self.seed_size): |
|
print(agent_state_index) |
|
print(agent_state_index.shape) |
|
print(seed_step_mask.long()) |
|
print(seed_step_mask.sum(dim=0)) |
|
raise RuntimeError(f"Seed size {self.seed_size} is too small.") |
|
|
|
|
|
av_index = int(data['agent']['av_index']) |
|
|
|
|
|
|
|
|
|
|
|
|
|
if not self.predict_state: |
|
agent_state_index = None |
|
|
|
feat_a, head_vector_a = self.agent_token_embedding(data, agent_token_index, agent_state_index, pos_a, head_a, av_index=av_index) |
|
|
|
|
|
mask = data['agent']['raw_agent_valid_mask'].clone() |
|
temporal_mask = mask.clone() |
|
interact_mask = mask.clone() |
|
if self.predict_state: |
|
|
|
agent_enter_offset_token_idx = data['agent']['neighbor_token_idx'] |
|
agent_enter_pl_token_idx = data['agent']['map_bos_token_idx'] |
|
agent_enter_pl_token_id = data['agent']['map_bos_token_id'] |
|
|
|
is_bos = agent_state_index == self.enter_state |
|
is_eos = agent_state_index == self.exit_state |
|
bos_index = torch.where(is_bos.any(dim=1), torch.argmax(is_bos.long(), dim=1), torch.tensor(0)) |
|
eos_index = torch.where(is_eos.any(dim=1), torch.argmax(is_eos.long(), dim=1), torch.tensor(num_step - 1)) |
|
|
|
temporal_mask = torch.ones_like(mask) |
|
motion_mask = torch.arange(mask.shape[1]).expand(mask.shape[0], -1).to(device) |
|
motion_mask = (motion_mask > bos_index[:, None]) & (motion_mask <= eos_index[:, None]) |
|
temporal_mask[motion_mask] = mask[motion_mask] |
|
|
|
interact_mask[agent_state_index == self.enter_state] = True |
|
interact_mask = torch.cat([interact_mask, torch.ones_like(interact_mask[:1])]).bool() |
|
|
|
edge_index_t, r_t = self.build_temporal_edge(pos_a, head_a, head_vector_a, agent_state_index, temporal_mask, |
|
av_index=av_index) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
batch_s = torch.arange(num_step, device=device).repeat_interleave(data['agent']['num_nodes'] + 1) |
|
batch_pl = torch.arange(num_step, device=device).repeat_interleave(data['pt_token']['num_nodes']) |
|
|
|
edge_index_a2a, r_a2a = self.build_interaction_edge(pos_a, head_a, head_vector_a, agent_state_index, batch_s, |
|
interact_mask, av_index=av_index) |
|
|
|
agent_category = torch.cat([agent_category, torch.full(agent_category[-1:].shape, 3, device=device)]) |
|
interact_mask[agent_category != 3] = False |
|
edge_index_pl2a, r_pl2a = self.build_map2agent_edge(data, num_step, pos_a, head_a, head_vector_a, |
|
agent_state_index, batch_s, batch_pl, interact_mask, av_index=av_index) |
|
|
|
|
|
|
|
|
|
|
|
for i in range(self.num_layers): |
|
|
|
|
|
|
|
feat_a = feat_a.reshape(-1, self.hidden_dim) |
|
feat_a = self.t_attn_layers[i](feat_a, r_t, edge_index_t) |
|
|
|
feat_a = feat_a.reshape(-1, num_step, self.hidden_dim).transpose(0, 1).reshape(-1, self.hidden_dim) |
|
feat_a = self.pt2a_attn_layers[i](( |
|
map_enc['x_pt'].repeat_interleave(repeats=num_step, dim=0).reshape(-1, num_step, self.hidden_dim).transpose(0, 1).reshape( |
|
-1, self.hidden_dim), feat_a), r_pl2a, edge_index_pl2a) |
|
|
|
feat_a = self.a2a_attn_layers[i](feat_a, r_a2a, edge_index_a2a) |
|
feat_a = feat_a.reshape(num_step, -1, self.hidden_dim).transpose(0, 1) |
|
|
|
|
|
next_token_prob = self.token_predict_head(feat_a[:-1]) |
|
next_token_prob_softmax = torch.softmax(next_token_prob, dim=-1) |
|
_, next_token_idx = torch.topk(next_token_prob_softmax, k=10, dim=-1) |
|
|
|
next_token_index_gt = agent_token_index.roll(shifts=-1, dims=1) |
|
|
|
|
|
next_state_prob = self.state_predict_head(feat_a[:-1]) |
|
next_state_idx = next_state_prob.softmax(dim=-1).argmax(dim=-1, keepdim=True) |
|
|
|
next_state_index_gt = agent_state_index.roll(shifts=-1, dims=1) |
|
|
|
|
|
feat_seed = self.seed_head(feat_a[-1:]) + self.seed_feature.weight[:, None] |
|
next_state_prob_seed = self.seed_state_predict_head(feat_seed) |
|
next_state_idx_seed = next_state_prob_seed.softmax(dim=-1).argmax(dim=-1, keepdim=True) |
|
|
|
next_type_prob_seed = self.seed_type_predict_head(feat_seed) |
|
next_type_idx_seed = next_type_prob_seed.softmax(dim=-1).argmax(dim=-1, keepdim=True) |
|
|
|
next_type_index_gt = agent_type_index[:, None].expand(-1, num_step).roll(shifts=-1, dims=1) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bos_token_index = torch.nonzero(agent_state_index == self.enter_state) |
|
eos_token_index = torch.nonzero(agent_state_index == self.exit_state) |
|
|
|
|
|
next_token_eval_mask = mask.clone() |
|
next_token_eval_mask = next_token_eval_mask * next_token_eval_mask.roll(shifts=-1, dims=1) * next_token_eval_mask.roll(shifts=1, dims=1) |
|
for bos_token_index_ in bos_token_index: |
|
next_token_eval_mask[bos_token_index_[0], bos_token_index_[1] : bos_token_index_[1] + 1] = 1 |
|
next_token_eval_mask[bos_token_index_[0], bos_token_index_[1] + 1 : bos_token_index_[1] + 2] = \ |
|
mask[bos_token_index_[0], bos_token_index_[1] + 2 : bos_token_index_[1] + 3] |
|
next_token_eval_mask[eos_token_index[:, 0], eos_token_index[:, 1]] = 0 |
|
|
|
|
|
next_state_eval_mask = mask.clone() |
|
next_state_eval_mask = next_state_eval_mask * next_state_eval_mask.roll(shifts=-1, dims=1) * next_state_eval_mask.roll(shifts=1, dims=1) |
|
for bos_token_index_ in bos_token_index: |
|
next_state_eval_mask[bos_token_index_[0], :bos_token_index_[1]] = 0 |
|
next_state_eval_mask[bos_token_index_[0], bos_token_index_[1] : bos_token_index_[1] + 1] = 1 |
|
next_state_eval_mask[bos_token_index_[0], bos_token_index_[1] + 1 : bos_token_index_[1] + 2] = \ |
|
mask[bos_token_index_[0], bos_token_index_[1] + 2 : bos_token_index_[1] + 3] |
|
for eos_token_index_ in eos_token_index: |
|
next_state_eval_mask[eos_token_index_[0], eos_token_index_[1] + 1:] = 1 |
|
next_state_eval_mask[eos_token_index_[0], eos_token_index_[1] : eos_token_index_[1] + 1] = \ |
|
mask[eos_token_index_[0], eos_token_index_[1] - 1 : eos_token_index_[1]] |
|
|
|
|
|
next_bos_token_index = torch.nonzero(next_state_index_gt == self.enter_state) |
|
next_bos_token_index = next_bos_token_index[next_bos_token_index[:, 1] < num_step - 1] |
|
|
|
next_state_index_gt_seed = torch.full((self.seed_size, num_step), self.seed_state_type.index('invalid'), device=next_state_index_gt.device) |
|
next_type_index_gt_seed = torch.full((self.seed_size, num_step), self.seed_agent_type.index('seed'), device=next_state_index_gt.device) |
|
next_eval_mask_seed = torch.ones_like(next_state_index_gt_seed) |
|
|
|
num_seed = torch.zeros(num_step, device=next_state_index_gt.device).long() |
|
for next_bos_token_index_ in next_bos_token_index: |
|
if num_seed[next_bos_token_index_[1]] < self.seed_size: |
|
next_state_index_gt_seed[num_seed[next_bos_token_index_[1]], next_bos_token_index_[1]] = self.seed_state_type.index('enter') |
|
next_type_index_gt_seed[num_seed[next_bos_token_index_[1]], next_bos_token_index_[1]] = next_type_index_gt[next_bos_token_index_[0], next_bos_token_index_[1]] |
|
num_seed[next_bos_token_index_[1]] += 1 |
|
|
|
|
|
next_token_eval_mask[:, -1] = 0 |
|
next_state_eval_mask[:, -1] = 0 |
|
next_eval_mask_seed[:, -1] = 0 |
|
|
|
|
|
|
|
if (next_token_index_gt[next_token_eval_mask] < 0).any(): |
|
raise RuntimeError() |
|
|
|
next_state_index_gt[next_state_index_gt == self.exit_state] = self.valid_state_type.index('exit') |
|
|
|
return {'x_a': feat_a, |
|
|
|
'next_token_idx': next_token_idx, |
|
'next_token_prob': next_token_prob, |
|
'next_token_idx_gt': next_token_index_gt, |
|
'next_token_eval_mask': next_token_eval_mask.bool(), |
|
|
|
'next_state_idx': next_state_idx, |
|
'next_state_prob': next_state_prob, |
|
'next_state_idx_gt': next_state_index_gt, |
|
'next_state_eval_mask': next_state_eval_mask.bool(), |
|
|
|
'next_state_idx_seed': next_state_idx_seed, |
|
'next_state_prob_seed': next_state_prob_seed, |
|
'next_state_idx_gt_seed': next_state_index_gt_seed, |
|
'next_type_idx_seed': next_type_idx_seed, |
|
'next_type_prob_seed': next_type_prob_seed, |
|
'next_type_idx_gt_seed': next_type_index_gt_seed, |
|
'next_eval_mask_seed': next_eval_mask_seed.bool(), |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
|
def inference(self, |
|
data: HeteroData, |
|
map_enc: Mapping[str, torch.Tensor]) -> Dict[str, torch.Tensor]: |
|
|
|
start_state_idx = data['agent']['state_idx'][:, (self.num_historical_steps - 1) // self.shift] |
|
filter_mask = (start_state_idx == self.valid_state) | (start_state_idx == self.exit_state) |
|
seed_step_mask = data['agent']['state_idx'][:, (self.num_historical_steps - 1) // self.shift:] == self.enter_state |
|
seed_agent_index_per_step = [torch.nonzero(seed_step_mask[:, t]).squeeze(dim=-1) for t in range(seed_step_mask.shape[1])] |
|
if torch.any(seed_step_mask.sum(dim=0) > self.seed_size): |
|
raise RuntimeError(f"Seed size {self.seed_size} is too small.") |
|
|
|
|
|
eval_mask = data['agent']['valid_mask'][filter_mask, self.num_historical_steps - 1] |
|
|
|
if self.predict_state: |
|
eval_mask = torch.ones_like(eval_mask).bool() |
|
|
|
|
|
pos_a = data['agent']['token_pos'][filter_mask].clone() |
|
state_a = data['agent']['state_idx'][filter_mask].clone() |
|
head_a = data['agent']['token_heading'][filter_mask].clone() |
|
gt_traj = data['agent']['position'][filter_mask, self.num_historical_steps:, :self.input_dim].contiguous() |
|
num_agent, num_step, traj_dim = pos_a.shape |
|
|
|
av_index = int(data['agent']['av_index']) |
|
av_index -= (~filter_mask[:av_index]).sum() |
|
|
|
|
|
pos_pl = data['pt_token']['position'][:, :2].clone() |
|
|
|
|
|
pos_a[:, (self.num_historical_steps - 1) // self.shift:] = 0 |
|
state_a[:, (self.num_historical_steps - 1) // self.shift:] = 0 |
|
head_a[:, (self.num_historical_steps - 1) // self.shift:] = 0 |
|
|
|
agent_valid_mask = data['agent']['raw_agent_valid_mask'][filter_mask].clone() |
|
agent_valid_mask[:, (self.num_historical_steps - 1) // self.shift:] = True |
|
agent_valid_mask[~eval_mask] = False |
|
agent_token_index = data['agent']['token_idx'][filter_mask] |
|
agent_state_index = data['agent']['state_idx'][filter_mask] |
|
agent_type = data['agent']['type'][filter_mask] |
|
agent_category = data['agent']['category'][filter_mask] |
|
|
|
feat_a, head_vector_a, agent_token_traj_all, agent_token_emb, categorical_embs = self.agent_token_embedding(data, |
|
agent_token_index, |
|
agent_state_index, |
|
pos_a, |
|
head_a, |
|
inference=True, |
|
filter_mask=filter_mask, |
|
av_index=av_index, |
|
) |
|
feat_seed = feat_a[-1:] |
|
feat_a = feat_a[:-1] |
|
|
|
agent_type = data["agent"]["type"][filter_mask] |
|
veh_mask = agent_type == 0 |
|
cyc_mask = agent_type == 2 |
|
ped_mask = agent_type == 1 |
|
|
|
|
|
self.num_recurrent_steps_val = data["agent"]['position'].shape[1] - self.num_historical_steps |
|
pred_traj = torch.zeros(pos_a.shape[0], self.num_recurrent_steps_val, 2, device=feat_a.device) |
|
pred_head = torch.zeros(pos_a.shape[0], self.num_recurrent_steps_val, device=feat_a.device) |
|
pred_type = agent_type.clone() |
|
pred_state = torch.zeros(pos_a.shape[0], self.num_recurrent_steps_val, device=feat_a.device) |
|
pred_prob = torch.zeros(pos_a.shape[0], self.num_recurrent_steps_val // self.shift, device=feat_a.device) |
|
next_token_idx_list = [] |
|
next_state_idx_list = [] |
|
next_bos_pl_idx_list = [] |
|
next_bos_offset_idx_list = [] |
|
feat_a_t_dict = {} |
|
feat_sa_t_dict = {} |
|
|
|
|
|
mask = agent_valid_mask.clone() |
|
temporal_mask = mask.clone() |
|
interact_mask = mask.clone() |
|
if self.predict_state: |
|
|
|
|
|
is_bos = agent_state_index == self.enter_state |
|
is_eos = agent_state_index == self.exit_state |
|
bos_index = torch.where(is_bos.any(dim=1), torch.argmax(is_bos.long(), dim=1), torch.tensor(0)) |
|
eos_index = torch.where(is_eos.any(dim=1), torch.argmax(is_eos.long(), dim=1), torch.tensor(num_step - 1)) |
|
|
|
temporal_mask = torch.ones_like(mask) |
|
motion_mask = torch.arange(mask.shape[1]).expand(mask.shape[0], mask.shape[1]).to(mask.device) |
|
motion_mask = (motion_mask > bos_index[:, None]) & (motion_mask <= eos_index[:, None]) |
|
motion_mask[:, self.num_historical_steps // self.shift:] = False |
|
temporal_mask[motion_mask] = mask[motion_mask] |
|
|
|
interact_mask = torch.ones_like(mask) |
|
non_motion_mask = ~motion_mask |
|
non_motion_mask[:, self.num_historical_steps // self.shift:] = False |
|
interact_mask[non_motion_mask] = False |
|
interact_mask[agent_state_index == self.enter_state] = True |
|
|
|
temporal_mask[:, (self.num_historical_steps - 1) // self.shift:] = True |
|
interact_mask[:, (self.num_historical_steps - 1) // self.shift:] = True |
|
|
|
|
|
|
|
|
|
|
|
|
|
for t in range(self.num_recurrent_steps_val // self.shift): |
|
|
|
|
|
num_agent = pos_a.shape[0] |
|
|
|
if t == 0: |
|
inference_mask = temporal_mask.clone() |
|
inference_mask = torch.cat([inference_mask, torch.ones_like(inference_mask[-1:])]) |
|
inference_mask[:, (self.num_historical_steps - 1) // self.shift + t:] = False |
|
else: |
|
inference_mask = torch.zeros_like(temporal_mask) |
|
inference_mask = torch.cat([inference_mask, torch.zeros_like(inference_mask[-1:])]) |
|
inference_mask[:, max((self.num_historical_steps - 1) // self.shift + t - (self.num_interaction_steps // self.shift), 0) : |
|
(self.num_historical_steps - 1) // self.shift + t] = True |
|
|
|
interact_mask = torch.cat([interact_mask, torch.ones_like(interact_mask[:1])]).bool() |
|
|
|
edge_index_t, r_t = self.build_temporal_edge(pos_a, head_a, head_vector_a, state_a, temporal_mask, inference_mask, |
|
av_index=av_index) |
|
|
|
|
|
batch_s = torch.arange(num_step, device=pos_a.device).repeat_interleave(num_agent + 1) |
|
batch_pl = torch.arange(num_step, device=pos_a.device).repeat_interleave(data['pt_token']['num_nodes']) |
|
|
|
|
|
edge_index_a2a, r_a2a = self.build_interaction_edge(pos_a, head_a, head_vector_a, state_a, batch_s, |
|
interact_mask, inference_mask, av_index=av_index) |
|
edge_index_pl2a, r_pl2a = self.build_map2agent_edge(data, num_step, pos_a, head_a, head_vector_a, state_a, batch_s, batch_pl, |
|
interact_mask, inference_mask, av_index=av_index) |
|
interact_mask = interact_mask[:-1] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for i in range(self.num_layers): |
|
|
|
if (i in feat_a_t_dict) and (i in feat_sa_t_dict): |
|
feat_a = feat_a_t_dict[i] |
|
feat_seed = feat_sa_t_dict[i] |
|
|
|
feat_a = torch.cat([feat_a, feat_seed], dim=0) |
|
|
|
feat_a = feat_a.reshape(-1, self.hidden_dim) |
|
feat_a = self.t_attn_layers[i](feat_a, r_t, edge_index_t) |
|
|
|
feat_a = feat_a.reshape(-1, num_step, self.hidden_dim).transpose(0, 1).reshape(-1, self.hidden_dim) |
|
feat_a = self.pt2a_attn_layers[i](( |
|
map_enc['x_pt'].repeat_interleave(repeats=num_step, dim=0).reshape(-1, num_step, self.hidden_dim).transpose(0, 1).reshape( |
|
-1, self.hidden_dim), feat_a), r_pl2a, edge_index_pl2a) |
|
|
|
feat_a = self.a2a_attn_layers[i](feat_a, r_a2a, edge_index_a2a) |
|
feat_a = feat_a.reshape(num_step, -1, self.hidden_dim).transpose(0, 1) |
|
|
|
feat_seed = feat_a[-1:] |
|
feat_a = feat_a[:-1] |
|
|
|
if t == 0: |
|
feat_a_t_dict[i + 1] = feat_a |
|
feat_sa_t_dict[i + 1] = feat_seed |
|
else: |
|
|
|
n = feat_a_t_dict[i + 1].shape[0] |
|
feat_a_t_dict[i + 1][:n, (self.num_historical_steps - 1) // self.shift - 1 + t] = feat_a[:n, (self.num_historical_steps - 1) // self.shift - 1 + t] |
|
|
|
if feat_a.shape[0] > n: |
|
m = feat_a.shape[0] - n |
|
feat_a_t_dict[i + 1] = torch.cat([feat_a_t_dict[i + 1], feat_a[-m:]]) |
|
|
|
feat_sa_t_dict[i + 1][:, (self.num_historical_steps - 1) // self.shift - 1 + t] = feat_seed[:, (self.num_historical_steps - 1) // self.shift - 1 + t] |
|
|
|
|
|
next_token_prob = self.token_predict_head(feat_a[:, (self.num_historical_steps - 1) // self.shift - 1 + t]) |
|
next_token_prob_softmax = torch.softmax(next_token_prob, dim=-1) |
|
topk_token_prob, next_token_idx = torch.topk(next_token_prob_softmax, k=self.beam_size, dim=-1) |
|
|
|
|
|
next_state_prob = self.state_predict_head(feat_a[:, (self.num_historical_steps - 1) // self.shift - 1 + t]) |
|
next_state_idx = next_state_prob.softmax(dim=-1).argmax(dim=-1) |
|
next_state_idx[next_state_idx == self.valid_state_type.index('exit')] = self.exit_state |
|
|
|
|
|
feat_seed = self.seed_head(feat_seed) + self.seed_feature.weight[:, None] |
|
next_state_prob_seed = self.seed_state_predict_head(feat_seed[:, (self.num_historical_steps - 1) // self.shift - 1 + t]) |
|
next_state_idx_seed = next_state_prob_seed.softmax(dim=-1).argmax(dim=-1, keepdim=True) |
|
next_state_idx_seed[next_state_idx_seed == self.seed_state_type.index('enter')] = self.enter_state |
|
|
|
next_type_prob_seed = self.seed_type_predict_head(feat_seed[:, (self.num_historical_steps - 1) // self.shift - 1 + t]) |
|
next_type_idx_seed = next_type_prob_seed.softmax(dim=-1).argmax(dim=-1, keepdim=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
expanded_token_index = next_token_idx[..., None, None, None].expand(-1, -1, 6, 4, 2) |
|
next_token_traj = torch.gather(agent_token_traj_all, 1, expanded_token_index) |
|
|
|
|
|
theta = head_a[:, (self.num_historical_steps - 1) // self.shift - 1 + t] |
|
cos, sin = theta.cos(), theta.sin() |
|
rot_mat = torch.zeros((num_agent, 2, 2), device=theta.device) |
|
rot_mat[:, 0, 0] = cos |
|
rot_mat[:, 0, 1] = sin |
|
rot_mat[:, 1, 0] = -sin |
|
rot_mat[:, 1, 1] = cos |
|
agent_diff_rel = torch.bmm(next_token_traj.view(-1, 4, 2), |
|
rot_mat[:, None, None, ...].repeat(1, self.beam_size, self.shift + 1, 1, 1).view( |
|
-1, 2, 2)).view(num_agent, self.beam_size, self.shift + 1, 4, 2) |
|
agent_pred_rel = agent_diff_rel + pos_a[:, (self.num_historical_steps - 1) // self.shift - 1 + t, :][:, None, None, None, ...] |
|
|
|
|
|
|
|
sample_token_index = torch.multinomial(topk_token_prob, 1).to(agent_pred_rel.device) |
|
next_token_idx = next_token_idx.gather(dim=1, index=sample_token_index).squeeze(-1) |
|
agent_pred_rel = agent_pred_rel.gather(dim=1, |
|
index=sample_token_index[..., None, None, None].expand(-1, -1, 6, 4, |
|
2))[:, 0, ...] |
|
|
|
|
|
diff_xy = agent_pred_rel[:, 1:, 0, :] - agent_pred_rel[:, 1:, 3, :] |
|
pred_traj[:num_agent, t * 5 : (t + 1) * 5] = agent_pred_rel[:, 1:, ...].clone().mean(dim=2) |
|
pred_head[:num_agent, t * 5 : (t + 1) * 5] = torch.arctan2(diff_xy[:, :, 1], diff_xy[:, :, 0]) |
|
pred_state[:num_agent, t * 5 : (t + 1) * 5] = next_state_idx[:, None].repeat(1, 5) |
|
|
|
|
|
|
|
pos_a[:, (self.num_historical_steps - 1) // self.shift + t] = agent_pred_rel[:, -1, ...].clone().mean(dim=1) |
|
diff_xy = agent_pred_rel[:, -1, 0, :] - agent_pred_rel[:, -1, 3, :] |
|
theta = torch.arctan2(diff_xy[:, 1], diff_xy[:, 0]) |
|
head_a[:, (self.num_historical_steps - 1) // self.shift + t] = theta |
|
state_a[:, (self.num_historical_steps - 1) // self.shift + t] = next_state_idx |
|
|
|
|
|
is_eos = next_state_idx == self.exit_state |
|
is_invalid = next_state_idx == self.invalid_state |
|
|
|
next_token_idx[is_invalid] = -1 |
|
pos_a[is_invalid, (self.num_historical_steps - 1) // self.shift + t] = 0. |
|
head_a[is_invalid, (self.num_historical_steps - 1) // self.shift + t] = 0. |
|
|
|
mask[is_invalid, (self.num_historical_steps - 1) // self.shift + t] = False |
|
interact_mask[is_invalid, (self.num_historical_steps - 1) // self.shift + t] = False |
|
|
|
agent_token_emb[is_invalid, (self.num_historical_steps - 1) // self.shift + t] = self.no_token_emb(torch.zeros(1, device=pos_a.device).long()) |
|
|
|
type_emb = categorical_embs[0].reshape(num_agent, num_step, -1) |
|
shape_emb = categorical_embs[1].reshape(num_agent, num_step, -1) |
|
type_emb[is_invalid, (self.num_historical_steps - 1) // self.shift + t] = self.type_a_emb(torch.tensor(self.all_agent_type.index('invalid'), device=pos_a.device).long()) |
|
shape_emb[is_invalid, (self.num_historical_steps - 1) // self.shift + t] = self.shape_emb(torch.full((1, 3), self.invalid_shape_value, device=pos_a.device)) |
|
categorical_embs = [type_emb.reshape(-1, self.hidden_dim), shape_emb.reshape(-1, self.hidden_dim)] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
seed_agent_index_cur_step = seed_agent_index_per_step[t] |
|
num_new_agent = min(len(seed_agent_index_cur_step), next_state_idx_seed.bool().sum()) |
|
new_agent_mask = next_state_idx_seed.bool() |
|
next_state_idx_seed = next_state_idx_seed[new_agent_mask] |
|
next_state_idx_seed = next_state_idx_seed[:num_new_agent] |
|
next_type_idx_seed = next_type_idx_seed[new_agent_mask] |
|
next_type_idx_seed = next_type_idx_seed[:num_new_agent] |
|
selected_agent_index_cur_step = seed_agent_index_cur_step[:num_new_agent] |
|
agent_token_index = torch.cat([agent_token_index, data['agent']['token_idx'][selected_agent_index_cur_step]]) |
|
agent_state_index = torch.cat([agent_state_index, data['agent']['state_idx'][selected_agent_index_cur_step]]) |
|
agent_category = torch.cat([agent_category, data['agent']['category'][selected_agent_index_cur_step]]) |
|
agent_valid_mask = torch.cat([agent_valid_mask, data['agent']['raw_agent_valid_mask'][selected_agent_index_cur_step]]) |
|
gt_traj = torch.cat([gt_traj, data['agent']['position'][selected_agent_index_cur_step, self.num_historical_steps:, :self.input_dim]]) |
|
|
|
|
|
next_state_idx = torch.cat([next_state_idx, next_state_idx_seed], dim=0).long() |
|
next_token_idx = torch.cat([next_token_idx, torch.zeros(num_new_agent, device=next_token_idx.device) - 2], dim=0).long() |
|
mask = torch.cat([mask, torch.ones(num_new_agent, num_step, device=mask.device)], dim=0).bool() |
|
temporal_mask = torch.cat([temporal_mask, torch.ones(num_new_agent, num_step, device=temporal_mask.device)], dim=0).bool() |
|
interact_mask = torch.cat([interact_mask, torch.ones(num_new_agent, num_step, device=interact_mask.device)], dim=0).bool() |
|
|
|
|
|
|
|
new_pos_a = torch.zeros(num_new_agent, num_step, 2, device=pos_a.device) |
|
new_head_a = torch.zeros(num_new_agent, num_step, device=pos_a.device) |
|
new_state_a = torch.zeros(num_new_agent, num_step, device=state_a.device) |
|
new_shape_a = torch.full((num_new_agent, num_step, 3), self.invalid_shape_value, device=pos_a.device) |
|
new_type_a = torch.full((num_new_agent, num_step), self.all_agent_type.index('invalid'), device=pos_a.device) |
|
|
|
if num_new_agent > 0: |
|
gt_bos_pos_a = data['agent']['position'][seed_agent_index_cur_step[:num_new_agent], (self.num_historical_steps - 1) // self.shift + t] |
|
new_pos_a[:, (self.num_historical_steps - 1) // self.shift + t] = gt_bos_pos_a[:, :2].clone() |
|
pos_a = torch.cat([pos_a, new_pos_a], dim=0) |
|
|
|
gt_bos_head_a = data['agent']['heading'][seed_agent_index_cur_step[:num_new_agent], (self.num_historical_steps - 1) // self.shift + t] |
|
new_head_a[:, (self.num_historical_steps - 1) // self.shift + t] = gt_bos_head_a.clone() |
|
head_a = torch.cat([head_a, new_head_a], dim=0) |
|
|
|
gt_bos_shape_a = data['agent']['shape'][seed_agent_index_cur_step[:num_new_agent], self.num_historical_steps - 1] |
|
gt_bos_type_a = data['agent']['type'][seed_agent_index_cur_step[:num_new_agent]] |
|
new_shape_a[:, (self.num_historical_steps - 1) // self.shift + t:] = gt_bos_shape_a.clone()[:, None] |
|
new_type_a[:, (self.num_historical_steps - 1) // self.shift + t:] = gt_bos_type_a.clone()[:, None] |
|
|
|
pred_type = torch.cat([pred_type, new_type_a[:, (self.num_historical_steps - 1) // self.shift + t]]) |
|
|
|
new_state_a[:, (self.num_historical_steps - 1) // self.shift + t] = self.enter_state |
|
state_a = torch.cat([state_a, new_state_a], dim=0) |
|
|
|
mask[-num_new_agent:, :(self.num_historical_steps - 1) // self.shift + t + 1] = 0 |
|
interact_mask[-num_new_agent:, :(self.num_historical_steps - 1) // self.shift + t] = 0 |
|
|
|
|
|
new_pred_traj = torch.zeros(num_new_agent, self.num_recurrent_steps_val, 2, device=pos_a.device) |
|
new_pred_traj[:, t * 5 : (t + 1) * 5] = new_pos_a[:, (self.num_historical_steps - 1) // self.shift + t][:, None].repeat(1, 5, 1) |
|
pred_traj = torch.cat([pred_traj, new_pred_traj], dim=0) |
|
|
|
new_pred_head = torch.zeros(num_new_agent, self.num_recurrent_steps_val, device=pos_a.device) |
|
new_pred_head[:, t * 5 : (t + 1) * 5] = new_head_a[:, (self.num_historical_steps - 1) // self.shift + t][:, None].repeat(1, 5) |
|
pred_head = torch.cat([pred_head, new_pred_head], dim=0) |
|
|
|
new_pred_state = torch.zeros(num_new_agent, self.num_recurrent_steps_val, device=pos_a.device) |
|
new_pred_state[:, t * 5 : (t + 1) * 5] = next_state_idx_seed[:, None].repeat(1, 5) |
|
pred_state = torch.cat([pred_state, new_pred_state], dim=0) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agent_token_emb = torch.cat([agent_token_emb, self.no_token_emb(torch.zeros(1, device=pos_a.device).long())[None, :].repeat(num_new_agent, num_step, 1)]) |
|
veh_mask = torch.cat([veh_mask, next_type_idx_seed == self.seed_agent_type.index('veh')]) |
|
ped_mask = torch.cat([ped_mask, next_type_idx_seed == self.seed_agent_type.index('ped')]) |
|
cyc_mask = torch.cat([cyc_mask, next_type_idx_seed == self.seed_agent_type.index('cyc')]) |
|
|
|
|
|
trajectory_token_veh = torch.from_numpy(self.trajectory_token['veh']).clone().to(pos_a.device).to(torch.float) |
|
trajectory_token_ped = torch.from_numpy(self.trajectory_token['ped']).clone().to(pos_a.device).to(torch.float) |
|
trajectory_token_cyc = torch.from_numpy(self.trajectory_token['cyc']).clone().to(pos_a.device).to(torch.float) |
|
|
|
new_agent_token_traj_all = torch.zeros((num_new_agent, self.token_size, self.shift + 1, 4, 2), device=pos_a.device) |
|
trajectory_token_all_veh = torch.from_numpy(self.trajectory_token_all['veh']).clone().to(pos_a.device).to(torch.float) |
|
trajectory_token_all_ped = torch.from_numpy(self.trajectory_token_all['ped']).clone().to(pos_a.device).to(torch.float) |
|
trajectory_token_all_cyc = torch.from_numpy(self.trajectory_token_all['cyc']).clone().to(pos_a.device).to(torch.float) |
|
new_agent_token_traj_all[next_type_idx_seed == 0] = torch.cat( |
|
[trajectory_token_all_veh[:, :self.shift], trajectory_token_veh[:, None, ...]], dim=1) |
|
new_agent_token_traj_all[next_type_idx_seed == 1] = torch.cat( |
|
[trajectory_token_all_ped[:, :self.shift], trajectory_token_ped[:, None, ...]], dim=1) |
|
new_agent_token_traj_all[next_type_idx_seed == 2] = torch.cat( |
|
[trajectory_token_all_cyc[:, :self.shift], trajectory_token_cyc[:, None, ...]], dim=1) |
|
|
|
agent_token_traj_all = torch.cat([agent_token_traj_all, new_agent_token_traj_all], dim=0) |
|
|
|
|
|
new_categorical_embs = [self.type_a_emb(new_type_a.reshape(-1).long()), self.shape_emb(new_shape_a.reshape(-1, 3))] |
|
categorical_embs = [torch.cat([categorical_embs[0], new_categorical_embs[0]], dim=0), |
|
torch.cat([categorical_embs[1], new_categorical_embs[1]], dim=0)] |
|
|
|
|
|
agent_token_emb[veh_mask, (self.num_historical_steps - 1) // self.shift + t] = self.agent_token_emb_veh[ |
|
next_token_idx[veh_mask]] |
|
agent_token_emb[ped_mask, (self.num_historical_steps - 1) // self.shift + t] = self.agent_token_emb_ped[ |
|
next_token_idx[ped_mask]] |
|
agent_token_emb[cyc_mask, (self.num_historical_steps - 1) // self.shift + t] = self.agent_token_emb_cyc[ |
|
next_token_idx[cyc_mask]] |
|
|
|
motion_vector_a, head_vector_a = self.build_vector_a(pos_a, head_a, state_a) |
|
|
|
motion_vector_a[:, (self.num_historical_steps - 1) // self.shift + 1 + t:] = 0. |
|
head_vector_a[:, (self.num_historical_steps - 1) // self.shift + 1 + t:] = 0. |
|
x_a = torch.stack( |
|
[torch.norm(motion_vector_a[:, :, :2], p=2, dim=-1), |
|
angle_between_2d_vectors(ctr_vector=head_vector_a, nbr_vector=motion_vector_a[:, :, :2])], dim=-1) |
|
|
|
x_b = x_a.clone() |
|
x_a = self.x_a_emb(continuous_inputs=x_a.view(-1, x_a.size(-1)), |
|
categorical_embs=categorical_embs) |
|
x_a = x_a.view(-1, num_step, self.hidden_dim) |
|
|
|
s_a = self.state_a_emb(state_a.reshape(-1).long()).reshape(num_agent + num_new_agent, num_step, self.hidden_dim) |
|
feat_a = torch.cat((agent_token_emb, x_a, s_a), dim=-1) |
|
feat_a = self.fusion_emb(feat_a) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
motion_vector_seed = motion_vector_a[av_index : av_index + 1] |
|
head_vector_seed = head_vector_a[av_index : av_index + 1] |
|
feat_seed = self.build_invalid_agent_feature(num_step, pos_a.device, type_index=self.all_agent_type.index('seed'), |
|
motion_vector=motion_vector_seed, head_vector=head_vector_seed) |
|
|
|
|
|
|
|
next_token_idx_list.append(next_token_idx[:, None]) |
|
next_state_idx_list.append(next_state_idx[:, None]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
num_agent = pos_a.shape[0] |
|
for i in range(len(next_token_idx_list)): |
|
next_token_idx_list[i] = torch.cat([next_token_idx_list[i], torch.zeros(num_agent - next_token_idx_list[i].shape[0], 1, device=next_token_idx_list[i].device) - 1], dim=0).long() |
|
next_state_idx_list[i] = torch.cat([next_state_idx_list[i], torch.zeros(num_agent - next_state_idx_list[i].shape[0], 1, device=next_state_idx_list[i].device)], dim=0).long() |
|
|
|
|
|
next_token_eval_mask = agent_valid_mask.clone() |
|
next_state_eval_mask = agent_valid_mask.clone() |
|
bos_token_index = torch.nonzero(agent_state_index == self.enter_state) |
|
eos_token_index = torch.nonzero(agent_state_index == self.exit_state) |
|
|
|
next_token_eval_mask[bos_token_index[:, 0], bos_token_index[:, 1]] = 1 |
|
|
|
for bos_token_index_i in bos_token_index: |
|
next_state_eval_mask[bos_token_index_i[0], :bos_token_index_i[1] + 2] = 1 |
|
for eos_token_index_i in eos_token_index: |
|
next_state_eval_mask[eos_token_index_i[0], eos_token_index_i[1]:] = 1 |
|
|
|
|
|
num_agent = pred_traj.shape[0] |
|
num_init_agent = filter_mask.sum() |
|
|
|
pred_traj = torch.cat([pred_traj, torch.zeros(num_agent, self.num_historical_steps - 1, *(pred_traj.shape[2:]), device=pred_traj.device)], dim=1) |
|
pred_head = torch.cat([pred_head, torch.zeros(num_agent, self.num_historical_steps - 1, *(pred_head.shape[2:]), device=pred_head.device)], dim=1) |
|
pred_state = torch.cat([pred_state, torch.zeros(num_agent, self.num_historical_steps - 1, *(pred_state.shape[2:]), device=pred_state.device)], dim=1) |
|
|
|
pred_state[:num_init_agent, :self.num_historical_steps - 1] = data['agent']['state_idx'][filter_mask, :(self.num_historical_steps - 1) // self.shift].repeat_interleave(repeats=self.shift, dim=1) |
|
|
|
historical_token_idx = data['agent']['token_idx'][filter_mask, :(self.num_historical_steps - 1) // self.shift] |
|
historical_token_idx[historical_token_idx < 0] = 0 |
|
historical_token_traj_all = torch.gather(agent_token_traj_all, 1, historical_token_idx[..., None, None, None].expand(-1, -1, 6, 4, 2)) |
|
init_theta = head_a[:num_init_agent, 0] |
|
cos, sin = init_theta.cos(), init_theta.sin() |
|
rot_mat = torch.zeros((num_init_agent, 2, 2), device=init_theta.device) |
|
rot_mat[:, 0, 0] = cos |
|
rot_mat[:, 0, 1] = sin |
|
rot_mat[:, 1, 0] = -sin |
|
rot_mat[:, 1, 1] = cos |
|
historical_token_traj_all = torch.bmm(historical_token_traj_all.view(-1, 4, 2), |
|
rot_mat[:, None, None, ...].repeat(1, (self.num_historical_steps - 1) // self.shift, self.shift + 1, 1, 1).view( |
|
-1, 2, 2)).view(num_init_agent, (self.num_historical_steps - 1) // self.shift, self.shift + 1, 4, 2) |
|
historical_token_traj_all = historical_token_traj_all + pos_a[:num_init_agent, 0, :][:, None, None, None, ...] |
|
pred_traj[:num_init_agent, :self.num_historical_steps - 1] = historical_token_traj_all[:, :, 1:, ...].clone().mean(dim=3).reshape(num_init_agent, -1, 2) |
|
diff_xy = historical_token_traj_all[..., 1:, 0, :] - historical_token_traj_all[..., 1:, 3, :] |
|
pred_head[:num_init_agent, :self.num_historical_steps - 1] = torch.arctan2(diff_xy[..., 1], diff_xy[..., 0]).reshape(num_init_agent, -1) |
|
|
|
return { |
|
'av_index': av_index, |
|
'valid_mask': agent_valid_mask[:, self.num_historical_steps:], |
|
'pos_a': pos_a[:, (self.num_historical_steps - 1) // self.shift:], |
|
'head_a': head_a[:, (self.num_historical_steps - 1) // self.shift:], |
|
'gt_traj': gt_traj, |
|
'pred_traj': pred_traj, |
|
'pred_head': pred_head, |
|
'pred_type': list(map(lambda i: self.seed_agent_type[i], pred_type.tolist())), |
|
'pred_state': pred_state, |
|
'next_token_idx': torch.cat(next_token_idx_list, dim=-1), |
|
'next_token_idx_gt': agent_token_index, |
|
'next_state_idx': torch.cat(next_state_idx_list, dim=-1) if len(next_state_idx_list) > 0 else None, |
|
'next_state_idx_gt': agent_state_index, |
|
'next_token_eval_mask': next_token_eval_mask, |
|
'next_state_eval_mask': next_state_eval_mask, |
|
|
|
|
|
} |
|
|