hzli commited on
Commit
545e7d1
·
verified ·
1 Parent(s): af04975

Upload DetrForObjectDetection

Browse files
Files changed (2) hide show
  1. config.json +137 -124
  2. model.safetensors +2 -2
config.json CHANGED
@@ -7,28 +7,113 @@
7
  ],
8
  "attention_dropout": 0.0,
9
  "auxiliary_loss": false,
10
- "backbone": "resnet50",
11
- "backbone_config": null,
12
- "backbone_kwargs": {
13
- "in_chans": 3,
14
- "out_indices": [
15
- 1,
16
- 2,
 
 
 
 
 
 
17
  3,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  4
19
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  },
 
21
  "bbox_cost": 5,
22
  "bbox_loss_coefficient": 5,
23
  "class_cost": 1,
24
- "classifier_dropout": 0.0,
25
  "d_model": 256,
26
  "decoder_attention_heads": 8,
27
  "decoder_ffn_dim": 2048,
28
  "decoder_layerdrop": 0.0,
29
  "decoder_layers": 6,
30
  "dice_loss_coefficient": 1,
31
- "dilation": false,
32
  "dropout": 0.1,
33
  "encoder_attention_heads": 8,
34
  "encoder_ffn_dim": 2048,
@@ -38,125 +123,53 @@
38
  "giou_cost": 2,
39
  "giou_loss_coefficient": 2,
40
  "id2label": {
41
- "1": "person",
42
- "2": "aeroplane",
43
- "3": "dog",
44
- "4": "chair",
45
- "5": "bird",
46
- "6": "bottle",
47
- "7": "sheep",
48
- "8": "tvmonitor",
49
- "9": "boat",
50
- "10": "diningtable",
51
- "11": "horse",
52
- "12": "train",
53
- "13": "motorbike",
54
- "14": "sofa",
55
- "15": "cow",
56
- "16": "bicycle",
57
- "17": "car",
58
- "18": "cat",
59
- "19": "bus",
60
- "20": "pottedplant"
61
  },
62
  "init_std": 0.02,
63
  "init_xavier_std": 1.0,
64
  "is_encoder_decoder": true,
65
  "label2id": {
66
- "N/A": 0,
67
- "airplane": 5,
68
- "apple": 53,
69
- "backpack": 27,
70
- "banana": 52,
71
- "baseball bat": 39,
72
- "baseball glove": 40,
73
- "bear": 23,
74
- "bed": 65,
75
- "bench": 15,
76
- "bicycle": 2,
77
- "bird": 16,
78
- "blender": 83,
79
- "boat": 9,
80
- "book": 84,
81
- "bottle": 44,
82
- "bowl": 51,
83
- "broccoli": 56,
84
- "bus": 6,
85
- "cake": 61,
86
- "car": 3,
87
- "carrot": 57,
88
- "cat": 17,
89
- "cell phone": 77,
90
- "chair": 62,
91
- "clock": 85,
92
- "couch": 63,
93
- "cow": 21,
94
- "cup": 47,
95
- "desk": 69,
96
- "dining table": 67,
97
- "dog": 18,
98
- "donut": 60,
99
- "door": 71,
100
- "elephant": 22,
101
- "eye glasses": 30,
102
- "fire hydrant": 11,
103
- "fork": 48,
104
- "frisbee": 34,
105
- "giraffe": 25,
106
- "hair drier": 89,
107
- "handbag": 31,
108
- "hat": 26,
109
- "horse": 19,
110
- "hot dog": 58,
111
- "keyboard": 76,
112
- "kite": 38,
113
- "knife": 49,
114
- "laptop": 73,
115
- "microwave": 78,
116
- "mirror": 66,
117
- "motorcycle": 4,
118
- "mouse": 74,
119
- "orange": 55,
120
- "oven": 79,
121
- "parking meter": 14,
122
- "person": 1,
123
- "pizza": 59,
124
- "plate": 45,
125
- "potted plant": 64,
126
- "refrigerator": 82,
127
- "remote": 75,
128
- "sandwich": 54,
129
- "scissors": 87,
130
- "sheep": 20,
131
- "shoe": 29,
132
- "sink": 81,
133
- "skateboard": 41,
134
- "skis": 35,
135
- "snowboard": 36,
136
- "spoon": 50,
137
- "sports ball": 37,
138
- "stop sign": 13,
139
- "street sign": 12,
140
- "suitcase": 33,
141
- "surfboard": 42,
142
- "teddy bear": 88,
143
- "tennis racket": 43,
144
- "tie": 32,
145
- "toaster": 80,
146
- "toilet": 70,
147
- "toothbrush": 90,
148
- "traffic light": 10,
149
- "train": 7,
150
- "truck": 8,
151
- "tv": 72,
152
- "umbrella": 28,
153
- "vase": 86,
154
- "window": 68,
155
- "wine glass": 46,
156
- "zebra": 24
157
  },
158
  "mask_loss_coefficient": 1,
159
- "max_position_embeddings": 1024,
160
  "model_type": "detr",
161
  "num_channels": 3,
162
  "num_hidden_layers": 6,
@@ -165,6 +178,6 @@
165
  "scale_embedding": false,
166
  "torch_dtype": "float32",
167
  "transformers_version": "4.47.1",
168
- "use_pretrained_backbone": true,
169
- "use_timm_backbone": true
170
  }
 
7
  ],
8
  "attention_dropout": 0.0,
9
  "auxiliary_loss": false,
10
+ "backbone": null,
11
+ "backbone_config": {
12
+ "_attn_implementation_autoset": false,
13
+ "_name_or_path": "",
14
+ "add_cross_attention": false,
15
+ "architectures": null,
16
+ "bad_words_ids": null,
17
+ "begin_suppress_tokens": null,
18
+ "bos_token_id": null,
19
+ "chunk_size_feed_forward": 0,
20
+ "cross_attention_hidden_size": null,
21
+ "decoder_start_token_id": null,
22
+ "depths": [
23
  3,
24
+ 4,
25
+ 6,
26
+ 3
27
+ ],
28
+ "diversity_penalty": 0.0,
29
+ "do_sample": false,
30
+ "downsample_in_bottleneck": false,
31
+ "downsample_in_first_stage": false,
32
+ "early_stopping": false,
33
+ "embedding_size": 64,
34
+ "encoder_no_repeat_ngram_size": 0,
35
+ "eos_token_id": null,
36
+ "exponential_decay_length_penalty": null,
37
+ "finetuning_task": null,
38
+ "forced_bos_token_id": null,
39
+ "forced_eos_token_id": null,
40
+ "hidden_act": "relu",
41
+ "hidden_sizes": [
42
+ 256,
43
+ 512,
44
+ 1024,
45
+ 2048
46
+ ],
47
+ "id2label": {
48
+ "0": "LABEL_0",
49
+ "1": "LABEL_1"
50
+ },
51
+ "is_decoder": false,
52
+ "is_encoder_decoder": false,
53
+ "label2id": {
54
+ "LABEL_0": 0,
55
+ "LABEL_1": 1
56
+ },
57
+ "layer_type": "bottleneck",
58
+ "length_penalty": 1.0,
59
+ "max_length": 20,
60
+ "min_length": 0,
61
+ "model_type": "resnet",
62
+ "no_repeat_ngram_size": 0,
63
+ "num_beam_groups": 1,
64
+ "num_beams": 1,
65
+ "num_channels": 3,
66
+ "num_return_sequences": 1,
67
+ "out_features": [
68
+ "stage4"
69
+ ],
70
+ "out_indices": [
71
  4
72
+ ],
73
+ "output_attentions": false,
74
+ "output_hidden_states": false,
75
+ "output_scores": false,
76
+ "pad_token_id": null,
77
+ "prefix": null,
78
+ "problem_type": null,
79
+ "pruned_heads": {},
80
+ "remove_invalid_values": false,
81
+ "repetition_penalty": 1.0,
82
+ "return_dict": true,
83
+ "return_dict_in_generate": false,
84
+ "sep_token_id": null,
85
+ "stage_names": [
86
+ "stem",
87
+ "stage1",
88
+ "stage2",
89
+ "stage3",
90
+ "stage4"
91
+ ],
92
+ "suppress_tokens": null,
93
+ "task_specific_params": null,
94
+ "temperature": 1.0,
95
+ "tf_legacy_loss": false,
96
+ "tie_encoder_decoder": false,
97
+ "tie_word_embeddings": true,
98
+ "tokenizer_class": null,
99
+ "top_k": 50,
100
+ "top_p": 1.0,
101
+ "torch_dtype": null,
102
+ "torchscript": false,
103
+ "typical_p": 1.0,
104
+ "use_bfloat16": false
105
  },
106
+ "backbone_kwargs": null,
107
  "bbox_cost": 5,
108
  "bbox_loss_coefficient": 5,
109
  "class_cost": 1,
 
110
  "d_model": 256,
111
  "decoder_attention_heads": 8,
112
  "decoder_ffn_dim": 2048,
113
  "decoder_layerdrop": 0.0,
114
  "decoder_layers": 6,
115
  "dice_loss_coefficient": 1,
116
+ "dilation": null,
117
  "dropout": 0.1,
118
  "encoder_attention_heads": 8,
119
  "encoder_ffn_dim": 2048,
 
123
  "giou_cost": 2,
124
  "giou_loss_coefficient": 2,
125
  "id2label": {
126
+ "0": "LABEL_0",
127
+ "1": "LABEL_1",
128
+ "2": "LABEL_2",
129
+ "3": "LABEL_3",
130
+ "4": "LABEL_4",
131
+ "5": "LABEL_5",
132
+ "6": "LABEL_6",
133
+ "7": "LABEL_7",
134
+ "8": "LABEL_8",
135
+ "9": "LABEL_9",
136
+ "10": "LABEL_10",
137
+ "11": "LABEL_11",
138
+ "12": "LABEL_12",
139
+ "13": "LABEL_13",
140
+ "14": "LABEL_14",
141
+ "15": "LABEL_15",
142
+ "16": "LABEL_16",
143
+ "17": "LABEL_17",
144
+ "18": "LABEL_18",
145
+ "19": "LABEL_19"
146
  },
147
  "init_std": 0.02,
148
  "init_xavier_std": 1.0,
149
  "is_encoder_decoder": true,
150
  "label2id": {
151
+ "LABEL_0": 0,
152
+ "LABEL_1": 1,
153
+ "LABEL_10": 10,
154
+ "LABEL_11": 11,
155
+ "LABEL_12": 12,
156
+ "LABEL_13": 13,
157
+ "LABEL_14": 14,
158
+ "LABEL_15": 15,
159
+ "LABEL_16": 16,
160
+ "LABEL_17": 17,
161
+ "LABEL_18": 18,
162
+ "LABEL_19": 19,
163
+ "LABEL_2": 2,
164
+ "LABEL_3": 3,
165
+ "LABEL_4": 4,
166
+ "LABEL_5": 5,
167
+ "LABEL_6": 6,
168
+ "LABEL_7": 7,
169
+ "LABEL_8": 8,
170
+ "LABEL_9": 9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  },
172
  "mask_loss_coefficient": 1,
 
173
  "model_type": "detr",
174
  "num_channels": 3,
175
  "num_hidden_layers": 6,
 
178
  "scale_embedding": false,
179
  "torch_dtype": "float32",
180
  "transformers_version": "4.47.1",
181
+ "use_pretrained_backbone": null,
182
+ "use_timm_backbone": false
183
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78d0293ecee2e6cf8d0838366e45341e150a8f9625dd9e311d46ddb7f0d12fa0
3
- size 166650908
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13ea13249f6d2db7b53b7f46e6ba0f2d67dccd6f8b98bc99f2fb6948f749db3d
3
+ size 166523260