File size: 5,163 Bytes
da2e2ac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
from dataclasses import dataclass
from typing import Any, List, Tuple, Dict
from nuplan.common.maps.abstract_map import SemanticMapLayer
from nuplan.common.actor_state.tracked_objects_types import TrackedObjectType
from nuplan.planning.simulation.trajectory.trajectory_sampling import TrajectorySampling
from navsim.agents.transfuser.transfuser_config import TransfuserConfig
import os
NAVSIM_DEVKIT_ROOT = os.environ.get("NAVSIM_DEVKIT_ROOT")
@dataclass
class HydraConfig(TransfuserConfig):
trajectory_imi_weight: float = 1.0
trajectory_pdm_weight = {
'noc': 3.0,
'da': 3.0,
'dd': 3.0,
'ttc': 2.0,
'progress': 1.0,
'comfort': 1.0,
}
progress_weight: float = 2.0
ttc_weight: float = 2.0
inference_imi_weight: float = 0.1
inference_da_weight: float = 1.0
decouple: bool = False
vocab_size: int = 4096
vocab_path: str = None
normalize_vocab_pos: bool = False
num_ego_status: int = 1
ckpt_path: str = None
sigma: float = 0.5
use_pers_bev_embed: bool = False
type: str = 'center'
rel: bool = False
use_nerf: bool = False
extra_traj_layer: bool = False
use_back_view: bool = False
extra_tr: bool = False
vadv2_head_nhead: int = 8
vadv2_head_nlayers: int = 3
trajectory_sampling: TrajectorySampling = TrajectorySampling(
time_horizon=4, interval_length=0.1
)
# img backbone
use_final_fpn: bool = False
use_img_pretrained: bool = False
# image_architecture: str = "vit_large_patch14_dinov2.lvd142m"
image_architecture: str = "resnet34"
backbone_type: str = 'resnet'
vit_ckpt: str = ''
intern_ckpt: str = ''
vov_ckpt: str = ''
eva_ckpt: str = ''
swin_ckpt: str = ''
sptr_ckpt: str = ''
map_ckpt: str = ''
lr_mult_backbone: float = 1.0
backbone_wd: float = 0.0
# lidar backbone
lidar_architecture: str = "resnet34"
max_height_lidar: float = 100.0
pixels_per_meter: float = 4.0
hist_max_per_pixel: int = 5
lidar_min_x: float = -32
lidar_max_x: float = 32
lidar_min_y: float = -32
lidar_max_y: float = 32
lidar_split_height: float = 0.2
use_ground_plane: bool = False
# new
lidar_seq_len: int = 1
camera_width: int = 2048
camera_height: int = 512
lidar_resolution_width: int = 256
lidar_resolution_height: int = 256
img_vert_anchors: int = camera_height // 32
img_horz_anchors: int = camera_width // 32
lidar_vert_anchors: int = lidar_resolution_height // 32
lidar_horz_anchors: int = lidar_resolution_width // 32
block_exp = 4
n_layer = 2 # Number of transformer layers used in the vision backbone
n_head = 4
n_scale = 4
embd_pdrop = 0.1
resid_pdrop = 0.1
attn_pdrop = 0.1
# Mean of the normal distribution initialization for linear layers in the GPT
gpt_linear_layer_init_mean = 0.0
# Std of the normal distribution initialization for linear layers in the GPT
gpt_linear_layer_init_std = 0.02
# Initial weight of the layer norms in the gpt.
gpt_layer_norm_init_weight = 1.0
perspective_downsample_factor = 1
transformer_decoder_join = True
detect_boxes = True
use_bev_semantic = True
use_semantic = False
use_depth = False
add_features = True
# Transformer
tf_d_model: int = 256
tf_d_ffn: int = 1024
tf_num_layers: int = 3
tf_num_head: int = 8
tf_dropout: float = 0.0
# detection
num_bounding_boxes: int = 30
# loss weights
agent_class_weight: float = 10.0
agent_box_weight: float = 1.0
bev_semantic_weight: float = 10.0
# BEV mapping
bev_semantic_classes = {
1: ("polygon", [SemanticMapLayer.LANE, SemanticMapLayer.INTERSECTION]), # road
2: ("polygon", [SemanticMapLayer.WALKWAYS]), # walkways
3: ("linestring", [SemanticMapLayer.LANE, SemanticMapLayer.LANE_CONNECTOR]), # centerline
4: (
"box",
[
TrackedObjectType.CZONE_SIGN,
TrackedObjectType.BARRIER,
TrackedObjectType.TRAFFIC_CONE,
TrackedObjectType.GENERIC_OBJECT,
],
), # static_objects
5: ("box", [TrackedObjectType.VEHICLE]), # vehicles
6: ("box", [TrackedObjectType.PEDESTRIAN]), # pedestrians
}
bev_pixel_width: int = lidar_resolution_width
bev_pixel_height: int = lidar_resolution_height // 2
bev_pixel_size: float = 1 / pixels_per_meter
num_bev_classes = 7
bev_features_channels: int = 64
bev_down_sample_factor: int = 4
bev_upsample_factor: int = 2
@property
def bev_semantic_frame(self) -> Tuple[int, int]:
return (self.bev_pixel_height, self.bev_pixel_width)
@property
def bev_radius(self) -> float:
values = [self.lidar_min_x, self.lidar_max_x, self.lidar_min_y, self.lidar_max_y]
return max([abs(value) for value in values])
|