detr-_swny / config.json
rathi2023's picture
Training in progress, step 100
12d7bd0 verified
{
"_name_or_path": "facebook/detr-resnet-50",
"activation_dropout": 0.0,
"activation_function": "relu",
"architectures": [
"DetrForObjectDetection"
],
"attention_dropout": 0.0,
"auxiliary_loss": false,
"backbone": "resnet50",
"backbone_config": null,
"backbone_kwargs": {
"in_chans": 3,
"out_indices": [
1,
2,
3,
4
]
},
"bbox_cost": 5,
"bbox_loss_coefficient": 5,
"class_cost": 1,
"classifier_dropout": 0.0,
"d_model": 256,
"decoder_attention_heads": 8,
"decoder_ffn_dim": 2048,
"decoder_layerdrop": 0.0,
"decoder_layers": 6,
"dice_loss_coefficient": 1,
"dilation": false,
"dropout": 0.1,
"encoder_attention_heads": 8,
"encoder_ffn_dim": 2048,
"encoder_layerdrop": 0.0,
"encoder_layers": 6,
"eos_coefficient": 0.1,
"giou_cost": 2,
"giou_loss_coefficient": 2,
"id2label": {
"1": 63,
"2": 64,
"3": 74,
"4": 75,
"5": 97,
"6": 98,
"7": 99,
"8": 95,
"9": 96,
"10": 49,
"11": 74,
"12": 8,
"13": 7,
"14": 87,
"15": 89,
"16": 25,
"17": 23,
"18": 24,
"19": 35,
"20": 34,
"21": 10,
"22": 26,
"23": 27,
"24": 81,
"25": 80,
"26": 78,
"27": 79,
"28": 6,
"29": 14,
"30": 16,
"31": 59,
"32": 60,
"33": 84,
"34": 106,
"35": 107,
"36": 2,
"37": 83,
"38": 85,
"39": 86,
"40": 66,
"41": 1,
"42": 74,
"43": 23,
"44": 24,
"45": 53,
"46": 28,
"47": 29,
"48": 103,
"49": 104,
"50": 105,
"51": 76,
"52": 77,
"53": 11,
"54": 13,
"55": 70,
"56": 18,
"57": 10,
"58": 92,
"59": 93,
"60": 83,
"61": 92,
"62": 61,
"63": 62,
"64": 68,
"65": 35,
"66": 34,
"67": 28,
"68": 30,
"69": 26,
"70": 94,
"71": 92,
"72": 101,
"73": 100,
"74": 102,
"75": 36,
"76": 37,
"77": 38,
"78": 97,
"79": 98,
"80": 99,
"81": 63,
"82": 101,
"83": 100,
"84": 102,
"85": 81,
"86": 80,
"87": 78,
"88": 53,
"89": 72,
"90": 21,
"91": 67,
"92": 8,
"93": 63,
"94": 63,
"95": 14,
"96": 17,
"97": 28,
"98": 29,
"99": 69,
"100": 61,
"101": 62,
"102": 3,
"103": 87,
"104": 88,
"105": 72,
"106": 73,
"107": 4,
"108": 32,
"109": 33,
"110": 41,
"111": 42,
"112": 98,
"113": 99,
"114": 47,
"115": 48,
"116": 15,
"117": 12,
"118": 44,
"119": 45,
"120": 46,
"121": 57,
"122": 58,
"123": 67,
"124": 52,
"125": 50,
"126": 31,
"127": 70,
"128": 71,
"129": 39,
"130": 40,
"131": 20,
"132": 19,
"133": 54,
"134": 55,
"135": 56,
"136": 57,
"137": 58,
"138": 51,
"139": 90,
"140": 91,
"141": 7,
"142": 9,
"143": 20,
"144": 19,
"145": 32,
"146": 51,
"147": 82,
"148": 15,
"149": 12,
"150": 87,
"151": 53,
"152": 22,
"153": 20,
"154": 19,
"155": 42,
"156": 43,
"157": 106,
"158": 107,
"159": 2
},
"init_std": 0.02,
"init_xavier_std": 1.0,
"is_encoder_decoder": true,
"label2id": {
"1": 41,
"2": 159,
"3": 102,
"4": 107,
"6": 28,
"7": 141,
"8": 92,
"9": 142,
"10": 57,
"11": 53,
"12": 149,
"13": 54,
"14": 95,
"15": 148,
"16": 30,
"17": 96,
"18": 56,
"19": 154,
"20": 153,
"21": 90,
"22": 152,
"23": 43,
"24": 44,
"25": 16,
"26": 69,
"27": 23,
"28": 97,
"29": 98,
"30": 68,
"31": 126,
"32": 145,
"33": 109,
"34": 66,
"35": 65,
"36": 75,
"37": 76,
"38": 77,
"39": 129,
"40": 130,
"41": 110,
"42": 155,
"43": 156,
"44": 118,
"45": 119,
"46": 120,
"47": 114,
"48": 115,
"49": 10,
"50": 125,
"51": 146,
"52": 124,
"53": 151,
"54": 133,
"55": 134,
"56": 135,
"57": 136,
"58": 137,
"59": 31,
"60": 32,
"61": 100,
"62": 101,
"63": 94,
"64": 2,
"66": 40,
"67": 123,
"68": 64,
"69": 99,
"70": 127,
"71": 128,
"72": 105,
"73": 106,
"74": 42,
"75": 4,
"76": 51,
"77": 52,
"78": 87,
"79": 27,
"80": 86,
"81": 85,
"82": 147,
"83": 60,
"84": 33,
"85": 38,
"86": 39,
"87": 150,
"88": 104,
"89": 15,
"90": 139,
"91": 140,
"92": 71,
"93": 59,
"94": 70,
"95": 8,
"96": 9,
"97": 78,
"98": 112,
"99": 113,
"100": 83,
"101": 82,
"102": 84,
"103": 48,
"104": 49,
"105": 50,
"106": 157,
"107": 158
},
"mask_loss_coefficient": 1,
"max_position_embeddings": 1024,
"model_type": "detr",
"num_channels": 3,
"num_hidden_layers": 6,
"num_queries": 100,
"position_embedding_type": "sine",
"scale_embedding": false,
"torch_dtype": "float32",
"transformers_version": "4.41.2",
"use_pretrained_backbone": true,
"use_timm_backbone": true
}