_BASE_: ../Base-AVIS.yaml MODEL: WEIGHTS: "./pre_models/SwinL_IN.pkl" BACKBONE: NAME: "D2SwinTransformer" SWIN: EMBED_DIM: 192 DEPTHS: [2, 2, 18, 2] NUM_HEADS: [6, 12, 24, 48] WINDOW_SIZE: 12 APE: False DROP_PATH_RATE: 0.3 PATCH_NORM: True PRETRAIN_IMG_SIZE: 384 PIXEL_MEAN: [123.675, 116.280, 103.530] PIXEL_STD: [58.395, 57.120, 57.375] META_ARCHITECTURE: "AVISM" MASK_ON: True SEM_SEG_HEAD: NAME: "MaskFormerHead" IGNORE_VALUE: 255 NUM_CLASSES: 26 LOSS_WEIGHT: 1.0 CONVS_DIM: 256 MASK_DIM: 256 NORM: "GN" # pixel decoder PIXEL_DECODER_NAME: "MSDeformAttnPixelDecoder" IN_FEATURES: ["res2", "res3", "res4", "res5"] DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["res3", "res4", "res5"] COMMON_STRIDE: 4 TRANSFORMER_ENC_LAYERS: 6 MASK_FORMER: TRANSFORMER_DECODER_NAME: "AVISMMultiScaleMaskedTransformerDecoder" TRANSFORMER_IN_FEATURE: "multi_scale_pixel_decoder" DEEP_SUPERVISION: True NO_OBJECT_WEIGHT: 0.1 CLASS_WEIGHT: 2.0 MASK_WEIGHT: 5.0 DICE_WEIGHT: 5.0 HIDDEN_DIM: 256 NUM_OBJECT_QUERIES: 200 NHEADS: 8 DROPOUT: 0.0 DIM_FEEDFORWARD: 2048 ENC_LAYERS: 0 PRE_NORM: False ENFORCE_INPUT_PROJ: False SIZE_DIVISIBILITY: 32 DEC_LAYERS: 10 # 9 decoder layers, add one for the loss on learnable query TRAIN_NUM_POINTS: 12544 OVERSAMPLE_RATIO: 3.0 IMPORTANCE_SAMPLE_RATIO: 0.75 TEST: SEMANTIC_ON: False INSTANCE_ON: True PANOPTIC_ON: False OVERLAP_THRESHOLD: 0.8 OBJECT_MASK_THRESHOLD: 0.8 AVISM: ENC_WINDOW_SIZE: 6 SIM_WEIGHT: 0.5 SOLVER: WARMUP_FACTOR: 1.0 WARMUP_ITERS: 10 WEIGHT_DECAY: 0.05 OPTIMIZER: "ADAMW" BACKBONE_MULTIPLIER: 0.1 CLIP_GRADIENTS: ENABLED: True CLIP_TYPE: "full_model" CLIP_VALUE: 0.01 NORM_TYPE: 2.0 INPUT: SAMPLING_FRAME_NUM: 5 SAMPLING_FRAME_RANGE: 20 SAMPLING_FRAME_SHUFFLE: False # MIN_SIZE_TRAIN_SAMPLING : ["range", "choice", "range_by_clip", "choice_by_clip"] MIN_SIZE_TRAIN_SAMPLING: "choice_by_clip" # RANDOM_FLIP : ["none", "horizontal", "flip_by_clip"]. "horizontal" is set by default. RANDOM_FLIP: "flip_by_clip" AUGMENTATIONS: [] MIN_SIZE_TRAIN: (288, 320, 352, 384, 416, 448, 480, 512) MAX_SIZE_TRAIN: 768 MIN_SIZE_TEST: 448 FORMAT: "RGB" CROP: ENABLED: True TYPE: "absolute_range" SIZE: (384, 600) LSJ_AUG: ENABLED: False IMAGE_SIZE: 768 MIN_SCALE: 0.1 MAX_SCALE: 2.0 DATALOADER: FILTER_EMPTY_ANNOTATIONS: True NUM_WORKERS: 8 OUTPUT_DIR: "./outputs/avism_SwinL_IN"