hzli
/

voc-detr

Object Detection

Transformers

Safetensors

detr

Inference Endpoints

Model card Files Files and versions Community

hzli commited on Jan 7

Commit

545e7d1

verified ·

1 Parent(s): af04975

Upload DetrForObjectDetection

Browse files

Files changed (2) hide show

config.json +137 -124
model.safetensors +2 -2

config.json CHANGED Viewed

@@ -7,28 +7,113 @@
   ],
   "attention_dropout": 0.0,
   "auxiliary_loss": false,
-  "backbone": "resnet50",
-  "backbone_config": null,
-  "backbone_kwargs": {
-    "in_chans": 3,
-    "out_indices": [
-      1,
-      2,
       3,
       4
-    ]
   },
   "bbox_cost": 5,
   "bbox_loss_coefficient": 5,
   "class_cost": 1,
-  "classifier_dropout": 0.0,
   "d_model": 256,
   "decoder_attention_heads": 8,
   "decoder_ffn_dim": 2048,
   "decoder_layerdrop": 0.0,
   "decoder_layers": 6,
   "dice_loss_coefficient": 1,
-  "dilation": false,
   "dropout": 0.1,
   "encoder_attention_heads": 8,
   "encoder_ffn_dim": 2048,
@@ -38,125 +123,53 @@
   "giou_cost": 2,
   "giou_loss_coefficient": 2,
   "id2label": {
-    "1": "person",
-    "2": "aeroplane",
-    "3": "dog",
-    "4": "chair",
-    "5": "bird",
-    "6": "bottle",
-    "7": "sheep",
-    "8": "tvmonitor",
-    "9": "boat",
-    "10": "diningtable",
-    "11": "horse",
-    "12": "train",
-    "13": "motorbike",
-    "14": "sofa",
-    "15": "cow",
-    "16": "bicycle",
-    "17": "car",
-    "18": "cat",
-    "19": "bus",
-    "20": "pottedplant"
   },
   "init_std": 0.02,
   "init_xavier_std": 1.0,
   "is_encoder_decoder": true,
   "label2id": {
-    "N/A": 0,
-    "airplane": 5,
-    "apple": 53,
-    "backpack": 27,
-    "banana": 52,
-    "baseball bat": 39,
-    "baseball glove": 40,
-    "bear": 23,
-    "bed": 65,
-    "bench": 15,
-    "bicycle": 2,
-    "bird": 16,
-    "blender": 83,
-    "boat": 9,
-    "book": 84,
-    "bottle": 44,
-    "bowl": 51,
-    "broccoli": 56,
-    "bus": 6,
-    "cake": 61,
-    "car": 3,
-    "carrot": 57,
-    "cat": 17,
-    "cell phone": 77,
-    "chair": 62,
-    "clock": 85,
-    "couch": 63,
-    "cow": 21,
-    "cup": 47,
-    "desk": 69,
-    "dining table": 67,
-    "dog": 18,
-    "donut": 60,
-    "door": 71,
-    "elephant": 22,
-    "eye glasses": 30,
-    "fire hydrant": 11,
-    "fork": 48,
-    "frisbee": 34,
-    "giraffe": 25,
-    "hair drier": 89,
-    "handbag": 31,
-    "hat": 26,
-    "horse": 19,
-    "hot dog": 58,
-    "keyboard": 76,
-    "kite": 38,
-    "knife": 49,
-    "laptop": 73,
-    "microwave": 78,
-    "mirror": 66,
-    "motorcycle": 4,
-    "mouse": 74,
-    "orange": 55,
-    "oven": 79,
-    "parking meter": 14,
-    "person": 1,
-    "pizza": 59,
-    "plate": 45,
-    "potted plant": 64,
-    "refrigerator": 82,
-    "remote": 75,
-    "sandwich": 54,
-    "scissors": 87,
-    "sheep": 20,
-    "shoe": 29,
-    "sink": 81,
-    "skateboard": 41,
-    "skis": 35,
-    "snowboard": 36,
-    "spoon": 50,
-    "sports ball": 37,
-    "stop sign": 13,
-    "street sign": 12,
-    "suitcase": 33,
-    "surfboard": 42,
-    "teddy bear": 88,
-    "tennis racket": 43,
-    "tie": 32,
-    "toaster": 80,
-    "toilet": 70,
-    "toothbrush": 90,
-    "traffic light": 10,
-    "train": 7,
-    "truck": 8,
-    "tv": 72,
-    "umbrella": 28,
-    "vase": 86,
-    "window": 68,
-    "wine glass": 46,
-    "zebra": 24
   },
   "mask_loss_coefficient": 1,
-  "max_position_embeddings": 1024,
   "model_type": "detr",
   "num_channels": 3,
   "num_hidden_layers": 6,
@@ -165,6 +178,6 @@
   "scale_embedding": false,
   "torch_dtype": "float32",
   "transformers_version": "4.47.1",
-  "use_pretrained_backbone": true,
-  "use_timm_backbone": true
 }

   ],
   "attention_dropout": 0.0,
   "auxiliary_loss": false,
+  "backbone": null,
+  "backbone_config": {
+    "_attn_implementation_autoset": false,
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "depths": [
       3,
+      4,
+      6,
+      3
+    ],
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "downsample_in_bottleneck": false,
+    "downsample_in_first_stage": false,
+    "early_stopping": false,
+    "embedding_size": 64,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "relu",
+    "hidden_sizes": [
+      256,
+      512,
+      1024,
+      2048
+    ],
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_type": "bottleneck",
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "resnet",
+    "no_repeat_ngram_size": 0,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_channels": 3,
+    "num_return_sequences": 1,
+    "out_features": [
+      "stage4"
+    ],
+    "out_indices": [
       4
+    ],
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "stage_names": [
+      "stem",
+      "stage1",
+      "stage2",
+      "stage3",
+      "stage4"
+    ],
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "typical_p": 1.0,
+    "use_bfloat16": false
   },
+  "backbone_kwargs": null,
   "bbox_cost": 5,
   "bbox_loss_coefficient": 5,
   "class_cost": 1,
   "d_model": 256,
   "decoder_attention_heads": 8,
   "decoder_ffn_dim": 2048,
   "decoder_layerdrop": 0.0,
   "decoder_layers": 6,
   "dice_loss_coefficient": 1,
+  "dilation": null,
   "dropout": 0.1,
   "encoder_attention_heads": 8,
   "encoder_ffn_dim": 2048,
   "giou_cost": 2,
   "giou_loss_coefficient": 2,
   "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6",
+    "7": "LABEL_7",
+    "8": "LABEL_8",
+    "9": "LABEL_9",
+    "10": "LABEL_10",
+    "11": "LABEL_11",
+    "12": "LABEL_12",
+    "13": "LABEL_13",
+    "14": "LABEL_14",
+    "15": "LABEL_15",
+    "16": "LABEL_16",
+    "17": "LABEL_17",
+    "18": "LABEL_18",
+    "19": "LABEL_19"
   },
   "init_std": 0.02,
   "init_xavier_std": 1.0,
   "is_encoder_decoder": true,
   "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_10": 10,
+    "LABEL_11": 11,
+    "LABEL_12": 12,
+    "LABEL_13": 13,
+    "LABEL_14": 14,
+    "LABEL_15": 15,
+    "LABEL_16": 16,
+    "LABEL_17": 17,
+    "LABEL_18": 18,
+    "LABEL_19": 19,
+    "LABEL_2": 2,
+    "LABEL_3": 3,
+    "LABEL_4": 4,
+    "LABEL_5": 5,
+    "LABEL_6": 6,
+    "LABEL_7": 7,
+    "LABEL_8": 8,
+    "LABEL_9": 9
   },
   "mask_loss_coefficient": 1,
   "model_type": "detr",
   "num_channels": 3,
   "num_hidden_layers": 6,
   "scale_embedding": false,
   "torch_dtype": "float32",
   "transformers_version": "4.47.1",
+  "use_pretrained_backbone": null,
+  "use_timm_backbone": false
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:78d0293ecee2e6cf8d0838366e45341e150a8f9625dd9e311d46ddb7f0d12fa0
-size 166650908

 version https://git-lfs.github.com/spec/v1
+oid sha256:13ea13249f6d2db7b53b7f46e6ba0f2d67dccd6f8b98bc99f2fb6948f749db3d
+size 166523260