{ "_name_or_path": "facebook/detr-resnet-50", "activation_dropout": 0.0, "activation_function": "relu", "architectures": [ "DetrForObjectDetection" ], "attention_dropout": 0.0, "auxiliary_loss": false, "backbone": "resnet50", "backbone_config": null, "backbone_kwargs": { "in_chans": 3, "out_indices": [ 1, 2, 3, 4 ] }, "bbox_cost": 5, "bbox_loss_coefficient": 5, "class_cost": 1, "classifier_dropout": 0.0, "d_model": 256, "decoder_attention_heads": 8, "decoder_ffn_dim": 2048, "decoder_layerdrop": 0.0, "decoder_layers": 6, "dice_loss_coefficient": 1, "dilation": false, "dropout": 0.1, "encoder_attention_heads": 8, "encoder_ffn_dim": 2048, "encoder_layerdrop": 0.0, "encoder_layers": 6, "eos_coefficient": 0.1, "giou_cost": 2, "giou_loss_coefficient": 2, "id2label": { "1": 63, "2": 64, "3": 74, "4": 75, "5": 97, "6": 98, "7": 99, "8": 95, "9": 96, "10": 49, "11": 74, "12": 8, "13": 7, "14": 87, "15": 89, "16": 25, "17": 23, "18": 24, "19": 35, "20": 34, "21": 10, "22": 26, "23": 27, "24": 81, "25": 80, "26": 78, "27": 79, "28": 6, "29": 14, "30": 16, "31": 59, "32": 60, "33": 84, "34": 106, "35": 107, "36": 2, "37": 83, "38": 85, "39": 86, "40": 66, "41": 1, "42": 74, "43": 23, "44": 24, "45": 53, "46": 28, "47": 29, "48": 103, "49": 104, "50": 105, "51": 76, "52": 77, "53": 11, "54": 13, "55": 70, "56": 18, "57": 10, "58": 92, "59": 93, "60": 83, "61": 92, "62": 61, "63": 62, "64": 68, "65": 35, "66": 34, "67": 28, "68": 30, "69": 26, "70": 94, "71": 92, "72": 101, "73": 100, "74": 102, "75": 36, "76": 37, "77": 38, "78": 97, "79": 98, "80": 99, "81": 63, "82": 101, "83": 100, "84": 102, "85": 81, "86": 80, "87": 78, "88": 53, "89": 72, "90": 21, "91": 67, "92": 8, "93": 63, "94": 63, "95": 14, "96": 17, "97": 28, "98": 29, "99": 69, "100": 61, "101": 62, "102": 3, "103": 87, "104": 88, "105": 72, "106": 73, "107": 4, "108": 32, "109": 33, "110": 41, "111": 42, "112": 98, "113": 99, "114": 47, "115": 48, "116": 15, "117": 12, "118": 44, "119": 45, "120": 46, "121": 57, "122": 58, "123": 67, "124": 52, "125": 50, "126": 31, "127": 70, "128": 71, "129": 39, "130": 40, "131": 20, "132": 19, "133": 54, "134": 55, "135": 56, "136": 57, "137": 58, "138": 51, "139": 90, "140": 91, "141": 7, "142": 9, "143": 20, "144": 19, "145": 32, "146": 51, "147": 82, "148": 15, "149": 12, "150": 87, "151": 53, "152": 22, "153": 20, "154": 19, "155": 42, "156": 43, "157": 106, "158": 107, "159": 2 }, "init_std": 0.02, "init_xavier_std": 1.0, "is_encoder_decoder": true, "label2id": { "1": 41, "2": 159, "3": 102, "4": 107, "6": 28, "7": 141, "8": 92, "9": 142, "10": 57, "11": 53, "12": 149, "13": 54, "14": 95, "15": 148, "16": 30, "17": 96, "18": 56, "19": 154, "20": 153, "21": 90, "22": 152, "23": 43, "24": 44, "25": 16, "26": 69, "27": 23, "28": 97, "29": 98, "30": 68, "31": 126, "32": 145, "33": 109, "34": 66, "35": 65, "36": 75, "37": 76, "38": 77, "39": 129, "40": 130, "41": 110, "42": 155, "43": 156, "44": 118, "45": 119, "46": 120, "47": 114, "48": 115, "49": 10, "50": 125, "51": 146, "52": 124, "53": 151, "54": 133, "55": 134, "56": 135, "57": 136, "58": 137, "59": 31, "60": 32, "61": 100, "62": 101, "63": 94, "64": 2, "66": 40, "67": 123, "68": 64, "69": 99, "70": 127, "71": 128, "72": 105, "73": 106, "74": 42, "75": 4, "76": 51, "77": 52, "78": 87, "79": 27, "80": 86, "81": 85, "82": 147, "83": 60, "84": 33, "85": 38, "86": 39, "87": 150, "88": 104, "89": 15, "90": 139, "91": 140, "92": 71, "93": 59, "94": 70, "95": 8, "96": 9, "97": 78, "98": 112, "99": 113, "100": 83, "101": 82, "102": 84, "103": 48, "104": 49, "105": 50, "106": 157, "107": 158 }, "mask_loss_coefficient": 1, "max_position_embeddings": 1024, "model_type": "detr", "num_channels": 3, "num_hidden_layers": 6, "num_queries": 100, "position_embedding_type": "sine", "scale_embedding": false, "torch_dtype": "float32", "transformers_version": "4.41.2", "use_pretrained_backbone": true, "use_timm_backbone": true }