detr-resnet_swny / config.json
rathi2023's picture
Training in progress, step 500
4078dd7 verified
{
"_name_or_path": "facebook/detr-resnet-50",
"activation_dropout": 0.0,
"activation_function": "relu",
"architectures": [
"DetrForObjectDetection"
],
"attention_dropout": 0.0,
"auxiliary_loss": false,
"backbone": "resnet50",
"backbone_config": null,
"backbone_kwargs": {
"in_chans": 3,
"out_indices": [
1,
2,
3,
4
]
},
"bbox_cost": 5,
"bbox_loss_coefficient": 5,
"class_cost": 1,
"classifier_dropout": 0.0,
"d_model": 256,
"decoder_attention_heads": 8,
"decoder_ffn_dim": 2048,
"decoder_layerdrop": 0.0,
"decoder_layers": 6,
"dice_loss_coefficient": 1,
"dilation": false,
"dropout": 0.1,
"encoder_attention_heads": 8,
"encoder_ffn_dim": 2048,
"encoder_layerdrop": 0.0,
"encoder_layers": 6,
"eos_coefficient": 0.1,
"giou_cost": 2,
"giou_loss_coefficient": 2,
"id2label": {
"1": 53,
"2": 98,
"3": 99,
"4": 47,
"5": 48,
"6": 28,
"7": 29,
"8": 51,
"9": 97,
"10": 98,
"11": 99,
"12": 14,
"13": 16,
"14": 50,
"15": 76,
"16": 77,
"17": 32,
"18": 33,
"19": 74,
"20": 83,
"21": 67,
"22": 101,
"23": 100,
"24": 102,
"25": 31,
"26": 4,
"27": 87,
"28": 89,
"29": 15,
"30": 12,
"31": 7,
"32": 92,
"33": 3,
"34": 52,
"35": 20,
"36": 19,
"37": 57,
"38": 58,
"39": 35,
"40": 34,
"41": 39,
"42": 40,
"43": 49,
"44": 20,
"45": 19,
"46": 85,
"47": 86,
"48": 74,
"49": 75,
"50": 61,
"51": 62,
"52": 18,
"53": 14,
"54": 35,
"55": 34,
"56": 1,
"57": 28,
"58": 30,
"59": 10,
"60": 53,
"61": 22,
"62": 20,
"63": 19,
"64": 74,
"65": 101,
"66": 100,
"67": 102,
"68": 72,
"69": 73,
"70": 21,
"71": 57,
"72": 58,
"73": 36,
"74": 37,
"75": 38,
"76": 63,
"77": 53,
"78": 90,
"79": 91,
"80": 7,
"81": 9,
"82": 97,
"83": 98,
"84": 99,
"85": 26,
"86": 27,
"87": 92,
"88": 93,
"89": 66,
"90": 61,
"91": 62,
"92": 94,
"93": 92,
"94": 87,
"95": 81,
"96": 80,
"97": 78,
"98": 79,
"99": 51,
"100": 68,
"101": 54,
"102": 55,
"103": 56,
"104": 69,
"105": 8,
"106": 103,
"107": 104,
"108": 105,
"109": 25,
"110": 23,
"111": 24,
"112": 72,
"113": 95,
"114": 96,
"115": 41,
"116": 42,
"117": 6,
"118": 70,
"119": 71,
"120": 44,
"121": 45,
"122": 46,
"123": 15,
"124": 12,
"125": 87,
"126": 88,
"127": 28,
"128": 29,
"129": 82,
"130": 83,
"131": 106,
"132": 107,
"133": 2,
"134": 42,
"135": 43,
"136": 81,
"137": 80,
"138": 78,
"139": 67,
"140": 8,
"141": 26,
"142": 32,
"143": 11,
"144": 13,
"145": 17,
"146": 23,
"147": 24,
"148": 63,
"149": 63,
"150": 70,
"151": 84,
"152": 106,
"153": 107,
"154": 2,
"155": 59,
"156": 60,
"157": 10,
"158": 63,
"159": 64
},
"init_std": 0.02,
"init_xavier_std": 1.0,
"is_encoder_decoder": true,
"label2id": {
"1": 56,
"2": 154,
"3": 33,
"4": 26,
"6": 117,
"7": 80,
"8": 140,
"9": 81,
"10": 157,
"11": 143,
"12": 124,
"13": 144,
"14": 53,
"15": 123,
"16": 13,
"17": 145,
"18": 52,
"19": 63,
"20": 62,
"21": 70,
"22": 61,
"23": 146,
"24": 147,
"25": 109,
"26": 141,
"27": 86,
"28": 127,
"29": 128,
"30": 58,
"31": 25,
"32": 142,
"33": 18,
"34": 55,
"35": 54,
"36": 73,
"37": 74,
"38": 75,
"39": 41,
"40": 42,
"41": 115,
"42": 134,
"43": 135,
"44": 120,
"45": 121,
"46": 122,
"47": 4,
"48": 5,
"49": 43,
"50": 14,
"51": 99,
"52": 34,
"53": 77,
"54": 101,
"55": 102,
"56": 103,
"57": 71,
"58": 72,
"59": 155,
"60": 156,
"61": 90,
"62": 91,
"63": 158,
"64": 159,
"66": 89,
"67": 139,
"68": 100,
"69": 104,
"70": 150,
"71": 119,
"72": 112,
"73": 69,
"74": 64,
"75": 49,
"76": 15,
"77": 16,
"78": 138,
"79": 98,
"80": 137,
"81": 136,
"82": 129,
"83": 130,
"84": 151,
"85": 46,
"86": 47,
"87": 125,
"88": 126,
"89": 28,
"90": 78,
"91": 79,
"92": 93,
"93": 88,
"94": 92,
"95": 113,
"96": 114,
"97": 82,
"98": 83,
"99": 84,
"100": 66,
"101": 65,
"102": 67,
"103": 106,
"104": 107,
"105": 108,
"106": 152,
"107": 153
},
"mask_loss_coefficient": 1,
"max_position_embeddings": 1024,
"model_type": "detr",
"num_channels": 3,
"num_hidden_layers": 6,
"num_queries": 100,
"position_embedding_type": "sine",
"scale_embedding": false,
"torch_dtype": "float32",
"transformers_version": "4.41.1",
"use_pretrained_backbone": true,
"use_timm_backbone": true
}