_BASE_: ../Base-AVIS.yaml MODEL: WEIGHTS: "./pre_models/R50_COCO.pth" MASK_ON: True META_ARCHITECTURE: "AVISM_COCO" SEM_SEG_HEAD: NAME: "MaskFormerHead" IGNORE_VALUE: 255 NUM_CLASSES: 26 LOSS_WEIGHT: 1.0 CONVS_DIM: 256 MASK_DIM: 256 NORM: "GN" # pixel decoder PIXEL_DECODER_NAME: "MSDeformAttnPixelDecoder" IN_FEATURES: ["res2", "res3", "res4", "res5"] DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["res3", "res4", "res5"] COMMON_STRIDE: 4 TRANSFORMER_ENC_LAYERS: 6 MASK_FORMER: TRANSFORMER_DECODER_NAME: "AVISMMultiScaleMaskedTransformerDecoder" TRANSFORMER_IN_FEATURE: "multi_scale_pixel_decoder" DEEP_SUPERVISION: True NO_OBJECT_WEIGHT: 0.1 CLASS_WEIGHT: 2.0 MASK_WEIGHT: 5.0 DICE_WEIGHT: 5.0 HIDDEN_DIM: 256 NUM_OBJECT_QUERIES: 100 NHEADS: 8 DROPOUT: 0.0 DIM_FEEDFORWARD: 2048 ENC_LAYERS: 0 PRE_NORM: False ENFORCE_INPUT_PROJ: False SIZE_DIVISIBILITY: 32 DEC_LAYERS: 10 # 9 decoder layers, add one for the loss on learnable query TRAIN_NUM_POINTS: 12544 OVERSAMPLE_RATIO: 3.0 IMPORTANCE_SAMPLE_RATIO: 0.75 TEST: SEMANTIC_ON: False INSTANCE_ON: True PANOPTIC_ON: False OVERLAP_THRESHOLD: 0.8 OBJECT_MASK_THRESHOLD: 0.8 AVISM: ENC_WINDOW_SIZE: 6 SIM_WEIGHT: 0.5 INPUT: SAMPLING_FRAME_NUM: 5 SAMPLING_FRAME_RANGE: 20 SAMPLING_FRAME_SHUFFLE: False AUGMENTATIONS: [] MIN_SIZE_TRAIN: (288, 320, 352, 384, 416, 448, 480, 512) MAX_SIZE_TRAIN: 768 MIN_SIZE_TEST: 360 FORMAT: "RGB" CROP: ENABLED: True TYPE: "absolute_range" SIZE: (384, 600) LSJ_AUG: ENABLED: False IMAGE_SIZE: 768 MIN_SCALE: 0.1 MAX_SCALE: 2.0 DATALOADER: FILTER_EMPTY_ANNOTATIONS: True NUM_WORKERS: 8 OUTPUT_DIR: "./outputs/avism_R50_COCO"