from dataclasses import dataclass from typing import Any, List, Tuple, Dict from nuplan.common.maps.abstract_map import SemanticMapLayer from nuplan.common.actor_state.tracked_objects_types import TrackedObjectType from nuplan.planning.simulation.trajectory.trajectory_sampling import TrajectorySampling from navsim.agents.transfuser.transfuser_config import TransfuserConfig import os NAVSIM_DEVKIT_ROOT = os.environ.get("NAVSIM_DEVKIT_ROOT") @dataclass class HydraConfig(TransfuserConfig): trajectory_imi_weight: float = 1.0 trajectory_pdm_weight = { 'noc': 3.0, 'da': 3.0, 'dd': 3.0, 'ttc': 2.0, 'progress': 1.0, 'comfort': 1.0, } progress_weight: float = 2.0 ttc_weight: float = 2.0 inference_imi_weight: float = 0.1 inference_da_weight: float = 1.0 decouple: bool = False vocab_size: int = 4096 vocab_path: str = None normalize_vocab_pos: bool = False num_ego_status: int = 1 ckpt_path: str = None sigma: float = 0.5 use_pers_bev_embed: bool = False type: str = 'center' rel: bool = False use_nerf: bool = False extra_traj_layer: bool = False use_back_view: bool = False extra_tr: bool = False vadv2_head_nhead: int = 8 vadv2_head_nlayers: int = 3 trajectory_sampling: TrajectorySampling = TrajectorySampling( time_horizon=4, interval_length=0.1 ) # img backbone use_final_fpn: bool = False use_img_pretrained: bool = False # image_architecture: str = "vit_large_patch14_dinov2.lvd142m" image_architecture: str = "resnet34" backbone_type: str = 'resnet' vit_ckpt: str = '' intern_ckpt: str = '' vov_ckpt: str = '' eva_ckpt: str = '' swin_ckpt: str = '' sptr_ckpt: str = '' map_ckpt: str = '' lr_mult_backbone: float = 1.0 backbone_wd: float = 0.0 # lidar backbone lidar_architecture: str = "resnet34" max_height_lidar: float = 100.0 pixels_per_meter: float = 4.0 hist_max_per_pixel: int = 5 lidar_min_x: float = -32 lidar_max_x: float = 32 lidar_min_y: float = -32 lidar_max_y: float = 32 lidar_split_height: float = 0.2 use_ground_plane: bool = False # new lidar_seq_len: int = 1 camera_width: int = 2048 camera_height: int = 512 lidar_resolution_width: int = 256 lidar_resolution_height: int = 256 img_vert_anchors: int = camera_height // 32 img_horz_anchors: int = camera_width // 32 lidar_vert_anchors: int = lidar_resolution_height // 32 lidar_horz_anchors: int = lidar_resolution_width // 32 block_exp = 4 n_layer = 2 # Number of transformer layers used in the vision backbone n_head = 4 n_scale = 4 embd_pdrop = 0.1 resid_pdrop = 0.1 attn_pdrop = 0.1 # Mean of the normal distribution initialization for linear layers in the GPT gpt_linear_layer_init_mean = 0.0 # Std of the normal distribution initialization for linear layers in the GPT gpt_linear_layer_init_std = 0.02 # Initial weight of the layer norms in the gpt. gpt_layer_norm_init_weight = 1.0 perspective_downsample_factor = 1 transformer_decoder_join = True detect_boxes = True use_bev_semantic = True use_semantic = False use_depth = False add_features = True # Transformer tf_d_model: int = 256 tf_d_ffn: int = 1024 tf_num_layers: int = 3 tf_num_head: int = 8 tf_dropout: float = 0.0 # detection num_bounding_boxes: int = 30 # loss weights agent_class_weight: float = 10.0 agent_box_weight: float = 1.0 bev_semantic_weight: float = 10.0 # BEV mapping bev_semantic_classes = { 1: ("polygon", [SemanticMapLayer.LANE, SemanticMapLayer.INTERSECTION]), # road 2: ("polygon", [SemanticMapLayer.WALKWAYS]), # walkways 3: ("linestring", [SemanticMapLayer.LANE, SemanticMapLayer.LANE_CONNECTOR]), # centerline 4: ( "box", [ TrackedObjectType.CZONE_SIGN, TrackedObjectType.BARRIER, TrackedObjectType.TRAFFIC_CONE, TrackedObjectType.GENERIC_OBJECT, ], ), # static_objects 5: ("box", [TrackedObjectType.VEHICLE]), # vehicles 6: ("box", [TrackedObjectType.PEDESTRIAN]), # pedestrians } bev_pixel_width: int = lidar_resolution_width bev_pixel_height: int = lidar_resolution_height // 2 bev_pixel_size: float = 1 / pixels_per_meter num_bev_classes = 7 bev_features_channels: int = 64 bev_down_sample_factor: int = 4 bev_upsample_factor: int = 2 @property def bev_semantic_frame(self) -> Tuple[int, int]: return (self.bev_pixel_height, self.bev_pixel_width) @property def bev_radius(self) -> float: values = [self.lidar_min_x, self.lidar_max_x, self.lidar_min_y, self.lidar_max_y] return max([abs(value) for value in values])