OUTPUT_DIR: 'OUTPUT/' WORKERS: 6 PRINT_FREQ: 500 AMP: ENABLED: true MODEL: NAME: cls_cvt SPEC: INIT: 'trunc_norm' NUM_STAGES: 3 PATCH_SIZE: [7, 3, 3] PATCH_STRIDE: [4, 2, 2] PATCH_PADDING: [2, 1, 1] DIM_EMBED: [64, 192, 384] NUM_HEADS: [1, 3, 6] DEPTH: [1, 2, 10] MLP_RATIO: [4.0, 4.0, 4.0] ATTN_DROP_RATE: [0.0, 0.0, 0.0] DROP_RATE: [0.0, 0.0, 0.0] DROP_PATH_RATE: [0.0, 0.0, 0.1] QKV_BIAS: [True, True, True] CLS_TOKEN: [False, False, True] POS_EMBED: [False, False, False] QKV_PROJ_METHOD: ['dw_bn', 'dw_bn', 'dw_bn'] KERNEL_QKV: [3, 3, 3] PADDING_KV: [1, 1, 1] STRIDE_KV: [2, 2, 2] PADDING_Q: [1, 1, 1] STRIDE_Q: [1, 1, 1] AUG: MIXUP_PROB: 1.0 MIXUP: 0.8 MIXCUT: 1.0 TIMM_AUG: USE_LOADER: true RE_COUNT: 1 RE_MODE: pixel RE_SPLIT: false RE_PROB: 0.25 AUTO_AUGMENT: rand-m9-mstd0.5-inc1 HFLIP: 0.5 VFLIP: 0.0 COLOR_JITTER: 0.4 INTERPOLATION: bicubic LOSS: LABEL_SMOOTHING: 0.1 CUDNN: BENCHMARK: true DETERMINISTIC: false ENABLED: true DATASET: DATASET: 'imagenet' DATA_FORMAT: 'jpg' ROOT: 'DATASET/imagenet/' TEST_SET: 'val' TRAIN_SET: 'train' TEST: BATCH_SIZE_PER_GPU: 32 IMAGE_SIZE: [224, 224] MODEL_FILE: '' INTERPOLATION: 3 TRAIN: BATCH_SIZE_PER_GPU: 256 LR: 0.00025 IMAGE_SIZE: [224, 224] BEGIN_EPOCH: 0 END_EPOCH: 300 LR_SCHEDULER: METHOD: 'timm' ARGS: sched: 'cosine' warmup_epochs: 5 warmup_lr: 0.000001 min_lr: 0.00001 cooldown_epochs: 10 decay_rate: 0.1 OPTIMIZER: adamW WD: 0.05 WITHOUT_WD_LIST: ['bn', 'bias', 'ln'] SHUFFLE: true DEBUG: DEBUG: false