Spaces:
Runtime error
Runtime error
Commit
Β·
c709b60
1
Parent(s):
812fc55
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. Β
See raw diff
- .gitattributes +11 -0
- .gitignore +62 -0
- .gitmodules +3 -0
- LICENSE +21 -0
- README.md +124 -8
- app.py +109 -0
- configs/Base.yaml +77 -0
- configs/GRiT_B_DenseCap.yaml +20 -0
- configs/GRiT_B_DenseCap_ObjectDet.yaml +23 -0
- configs/GRiT_B_ObjectDet.yaml +20 -0
- configs/GRiT_H_ObjectDet.yaml +21 -0
- configs/GRiT_L_ObjectDet.yaml +20 -0
- datasets/DATASETS.md +62 -0
- demo.py +125 -0
- demo_images/000000353174.jpg +0 -0
- demo_images/000000438652.jpg +0 -0
- demo_images/000000453583.jpg +0 -0
- demo_images/000000466185.jpg +0 -0
- demo_images/000000497110.jpg +0 -0
- demo_images/000000497861.jpg +0 -0
- demo_images/000000546072.jpg +0 -0
- detectron2/.circleci/config.yml +270 -0
- detectron2/.circleci/import-tests.sh +16 -0
- detectron2/.clang-format +85 -0
- detectron2/.flake8 +15 -0
- detectron2/.gitignore +53 -0
- detectron2/GETTING_STARTED.md +79 -0
- detectron2/INSTALL.md +262 -0
- detectron2/LICENSE +202 -0
- detectron2/MODEL_ZOO.md +1052 -0
- detectron2/README.md +68 -0
- detectron2/build/lib.linux-x86_64-3.10/detectron2/_C.cpython-310-x86_64-linux-gnu.so +3 -0
- detectron2/build/lib.linux-x86_64-3.10/detectron2/__init__.py +10 -0
- detectron2/build/lib.linux-x86_64-3.10/detectron2/checkpoint/__init__.py +10 -0
- detectron2/build/lib.linux-x86_64-3.10/detectron2/checkpoint/c2_model_loading.py +407 -0
- detectron2/build/lib.linux-x86_64-3.10/detectron2/checkpoint/catalog.py +115 -0
- detectron2/build/lib.linux-x86_64-3.10/detectron2/checkpoint/detection_checkpoint.py +121 -0
- detectron2/build/lib.linux-x86_64-3.10/detectron2/config/__init__.py +24 -0
- detectron2/build/lib.linux-x86_64-3.10/detectron2/config/compat.py +229 -0
- detectron2/build/lib.linux-x86_64-3.10/detectron2/config/config.py +265 -0
- detectron2/build/lib.linux-x86_64-3.10/detectron2/config/defaults.py +646 -0
- detectron2/build/lib.linux-x86_64-3.10/detectron2/config/instantiate.py +83 -0
- detectron2/build/lib.linux-x86_64-3.10/detectron2/config/lazy.py +400 -0
- detectron2/build/lib.linux-x86_64-3.10/detectron2/data/__init__.py +19 -0
- detectron2/build/lib.linux-x86_64-3.10/detectron2/data/benchmark.py +225 -0
- detectron2/build/lib.linux-x86_64-3.10/detectron2/data/build.py +556 -0
- detectron2/build/lib.linux-x86_64-3.10/detectron2/data/catalog.py +236 -0
- detectron2/build/lib.linux-x86_64-3.10/detectron2/data/common.py +244 -0
- detectron2/build/lib.linux-x86_64-3.10/detectron2/data/dataset_mapper.py +191 -0
- detectron2/build/lib.linux-x86_64-3.10/detectron2/data/datasets/__init__.py +9 -0
.gitattributes
CHANGED
@@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
detectron2/build/lib.linux-x86_64-3.10/detectron2/_C.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
37 |
+
detectron2/build/temp.linux-x86_64-3.10/home/vishak66/Bitbucket/Vishakaraj[[:space:]]Shanmugavel/grit-hugging-face/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.o filter=lfs diff=lfs merge=lfs -text
|
38 |
+
detectron2/build/temp.linux-x86_64-3.10/home/vishak66/Bitbucket/Vishakaraj[[:space:]]Shanmugavel/grit-hugging-face/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.o filter=lfs diff=lfs merge=lfs -text
|
39 |
+
detectron2/build/temp.linux-x86_64-3.10/home/vishak66/Bitbucket/Vishakaraj[[:space:]]Shanmugavel/grit-hugging-face/detectron2/detectron2/layers/csrc/cocoeval/cocoeval.o filter=lfs diff=lfs merge=lfs -text
|
40 |
+
detectron2/build/temp.linux-x86_64-3.10/home/vishak66/Bitbucket/Vishakaraj[[:space:]]Shanmugavel/grit-hugging-face/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.o filter=lfs diff=lfs merge=lfs -text
|
41 |
+
detectron2/build/temp.linux-x86_64-3.10/home/vishak66/Bitbucket/Vishakaraj[[:space:]]Shanmugavel/grit-hugging-face/detectron2/detectron2/layers/csrc/vision.o filter=lfs diff=lfs merge=lfs -text
|
42 |
+
detectron2/detectron2/_C.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
43 |
+
detectron2/detectron2-0.6-cp310-cp310-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
|
44 |
+
docs/chatgpt.png filter=lfs diff=lfs merge=lfs -text
|
45 |
+
docs/demo.png filter=lfs diff=lfs merge=lfs -text
|
46 |
+
docs/grit.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
third_party/detectron2
|
2 |
+
./models
|
3 |
+
configs-experimental
|
4 |
+
experiments
|
5 |
+
# output dir
|
6 |
+
index.html
|
7 |
+
data/*
|
8 |
+
slurm/
|
9 |
+
slurm
|
10 |
+
slurm-output
|
11 |
+
slurm-output/
|
12 |
+
output
|
13 |
+
instant_test_output
|
14 |
+
inference_test_output
|
15 |
+
./aux_data
|
16 |
+
train.sh
|
17 |
+
install.sh
|
18 |
+
|
19 |
+
|
20 |
+
*.diff
|
21 |
+
|
22 |
+
# compilation and distribution
|
23 |
+
__pycache__
|
24 |
+
_ext
|
25 |
+
*.pyc
|
26 |
+
*.pyd
|
27 |
+
*.so
|
28 |
+
*.dll
|
29 |
+
*.egg-info/
|
30 |
+
build/
|
31 |
+
dist/
|
32 |
+
wheels/
|
33 |
+
|
34 |
+
# pytorch/python/numpy formats
|
35 |
+
*.pth
|
36 |
+
*.pkl
|
37 |
+
*.ts
|
38 |
+
model_ts*.txt
|
39 |
+
|
40 |
+
# ipython/jupyter notebooks
|
41 |
+
*.ipynb
|
42 |
+
**/.ipynb_checkpoints/
|
43 |
+
|
44 |
+
# Editor temporaries
|
45 |
+
*.swn
|
46 |
+
*.swo
|
47 |
+
*.swp
|
48 |
+
*~
|
49 |
+
|
50 |
+
# editor settings
|
51 |
+
.idea
|
52 |
+
.vscode
|
53 |
+
_darcs
|
54 |
+
|
55 |
+
# project dirs
|
56 |
+
/detectron2/model_zoo/configs
|
57 |
+
/datasets/*
|
58 |
+
!/datasets/*.*
|
59 |
+
!/datasets/metadata
|
60 |
+
/projects/*/datasets
|
61 |
+
/models
|
62 |
+
/snippet
|
.gitmodules
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[submodule "third_party/CenterNet2"]
|
2 |
+
path = third_party/CenterNet2
|
3 |
+
url = https://github.com/xingyizhou/CenterNet2.git
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2022 Microsoft Corporation
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
CHANGED
@@ -1,12 +1,128 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji: π
|
4 |
-
colorFrom: purple
|
5 |
-
colorTo: green
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 3.44.0
|
8 |
app_file: app.py
|
9 |
-
|
|
|
10 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Dense_Captioning_-_GRiT
|
|
|
|
|
|
|
|
|
|
|
3 |
app_file: app.py
|
4 |
+
sdk: gradio
|
5 |
+
sdk_version: 3.42.0
|
6 |
---
|
7 |
+
# GRiT: A Generative Region-to-text Transformer for Object Understanding
|
8 |
+
GRiT is a general and open-set object understanding framework that localizes objects and
|
9 |
+
describes them with any style of free-form texts it was trained with, e.g., class names, descriptive sentences
|
10 |
+
(including object attributes, actions, counts and many more).
|
11 |
+
|
12 |
+
> [**GRiT: A Generative Region-to-text Transformer for Object Understanding**](https://arxiv.org/abs/2212.00280) \
|
13 |
+
> Jialian Wu, Jianfeng Wang, Zhengyuan Yang, Zhe Gan, Zicheng Liu, Junsong Yuan, Lijuan Wang \
|
14 |
+
> <sup>1</sup>State University of New York at Buffalo, <sup>2</sup>Microsoft \
|
15 |
+
> *arXiv technical report* ([PDF](https://arxiv.org/pdf/2212.00280.pdf))
|
16 |
+
|
17 |
+
<p align="center"> <img src='docs/grit.png' align="center" height="400px"> </p>
|
18 |
+
|
19 |
+
## Installation
|
20 |
+
|
21 |
+
Please follow [Installation instructions](docs/INSTALL.md).
|
22 |
+
|
23 |
+
## ChatGPT with GRiT
|
24 |
+
We give ChatGPT GRiT's dense captioning outputs (object location and description) to have it
|
25 |
+
describe the scene and even write poetry. ChatGPT can generate amazing scene descriptions given our dense
|
26 |
+
captioning outputs. An example is shown below: :star_struck::star_struck::star_struck:
|
27 |
+
|
28 |
+
<p align="center"> <img src='docs/chatgpt.png' align="center"> </p>
|
29 |
+
|
30 |
+
|
31 |
+
## Object Understanding Demo - One Model Two tasks
|
32 |
+
|
33 |
+
[Download the GRiT model](https://datarelease.blob.core.windows.net/grit/models/grit_b_densecap_objectdet.pth) or use the following commend to download:
|
34 |
+
~~~
|
35 |
+
mkdir models && cd models
|
36 |
+
wget https://datarelease.blob.core.windows.net/grit/models/grit_b_densecap_objectdet.pth && cd ..
|
37 |
+
~~~
|
38 |
+
The downloaded GRiT model was jointly trained on dense captioning
|
39 |
+
task and object detection task. With the same trained model, it can
|
40 |
+
output both rich descriptive sentences and short class names by varying
|
41 |
+
the flag `--test-task`. Play it as follows! :star_struck:
|
42 |
+
|
43 |
+
### *Output for Dense Captioning (rich descriptive sentences)*
|
44 |
+
|
45 |
+
~~~
|
46 |
+
python demo.py --test-task DenseCap --config-file configs/GRiT_B_DenseCap_ObjectDet.yaml --input demo_images --output visualization --opts MODEL.WEIGHTS models/grit_b_densecap_objectdet.pth
|
47 |
+
~~~
|
48 |
+
|
49 |
+
### *Output for Object Detection (short class names)*
|
50 |
+
|
51 |
+
~~~
|
52 |
+
python demo.py --test-task ObjectDet --config-file configs/GRiT_B_DenseCap_ObjectDet.yaml --input demo_images --output visualization --opts MODEL.WEIGHTS models/grit_b_densecap_objectdet.pth
|
53 |
+
~~~
|
54 |
+
Output images will be saved under the `visualization` folder, which looks like:
|
55 |
+
<p align="center"> <img src='docs/demo.png' align="center"> </p>
|
56 |
+
|
57 |
+
You can also try the Colab demo provided by the [TWC team](https://github.com/taskswithcode): [](https://colab.research.google.com/github/taskswithcode/GriT/blob/master/TWCGRiT.ipynb)
|
58 |
+
|
59 |
+
|
60 |
+
## Benchmark Inference and Evaluation
|
61 |
+
Please follow [dataset preparation instructions](datasets/DATASETS.md) to download datasets.
|
62 |
+
|
63 |
+
Download our trained models and put them to `models/` for evaluation.
|
64 |
+
### *Object Detection on COCO 2017 Dataset*
|
65 |
+
|
66 |
+
| Model | val AP | test-dev AP | Download |
|
67 |
+
|-----------------------|-----------------|----------|----------|
|
68 |
+
|[GRiT (ViT-B)](configs/GRiT_B_ObjectDet.yaml)|53.7|53.8| [model](https://datarelease.blob.core.windows.net/grit/models/grit_b_objectdet.pth) |
|
69 |
+
|[GRiT (ViT-L)](configs/GRiT_L_ObjectDet.yaml)|56.4|56.6| [model](https://datarelease.blob.core.windows.net/grit/models/grit_l_objectdet.pth) |
|
70 |
+
|[GRiT (ViT-H)](configs/GRiT_H_ObjectDet.yaml)|60.4|60.4| [model](https://datarelease.blob.core.windows.net/grit/models/grit_h_objectdet.pth) |
|
71 |
+
|
72 |
+
To evaluate the trained GRiT on coco 2017 val, run:
|
73 |
+
~~~
|
74 |
+
# GRiT (ViT-B)
|
75 |
+
python train_net.py --num-gpus-per-machine 8 --config-file configs/GRiT_B_ObjectDet.yaml --output-dir-name ./output/grit_b_objectdet --eval-only MODEL.WEIGHTS models/grit_b_objectdet.pth
|
76 |
+
# GRiT (ViT-L)
|
77 |
+
python train_net.py --num-gpus-per-machine 8 --config-file configs/GRiT_L_ObjectDet.yaml --output-dir-name ./output/grit_l_objectdet --eval-only MODEL.WEIGHTS models/grit_l_objectdet.pth
|
78 |
+
# GRiT (ViT-H)
|
79 |
+
python train_net.py --num-gpus-per-machine 8 --config-file configs/GRiT_H_ObjectDet.yaml --output-dir-name ./output/grit_h_objectdet --eval-only MODEL.WEIGHTS models/grit_h_objectdet.pth
|
80 |
+
~~~
|
81 |
+
|
82 |
+
### *Dense Captioning on VG Dataset*
|
83 |
+
| Model | mAP | Download |
|
84 |
+
|-----------------------|-----------------|----------|
|
85 |
+
|[GRiT (ViT-B)](configs/GRiT_B_DenseCap.yaml)|15.5| [model](https://datarelease.blob.core.windows.net/grit/models/grit_b_densecap.pth) |
|
86 |
+
|
87 |
+
To test on VG test set, run:
|
88 |
+
~~~
|
89 |
+
python train_net.py --num-gpus-per-machine 8 --config-file configs/GRiT_B_DenseCap.yaml --output-dir-name ./output/grit_b_densecap --eval-only MODEL.WEIGHTS models/grit_b_densecap.pth
|
90 |
+
~~~
|
91 |
+
It will save the inference results to `output/grit_b_densecap/vg_instances_results.json`.
|
92 |
+
We use the VG dense captioning [official evaluation codebase](https://github.com/jcjohnson/densecap)
|
93 |
+
to report the results. We didn't integrate the evaluation code into our project as it was written in Lua.
|
94 |
+
To evaluate on VG, please follow the original codebase's instructions and test based upon it. We're happy to discuss
|
95 |
+
in our issue section about the issues you may encounter when using their code.
|
96 |
+
|
97 |
+
## Training
|
98 |
+
To save training memory, we use [DeepSpeed](https://github.com/microsoft/DeepSpeed) for training which can work well for
|
99 |
+
[activation checkpointing](https://pytorch.org/docs/stable/checkpoint.html) in distributed training.
|
100 |
+
|
101 |
+
To train on single machine node, run:
|
102 |
+
~~~
|
103 |
+
python train_deepspeed.py --num-gpus-per-machine 8 --config-file configs/GRiT_B_ObjectDet.yaml --output-dir-name ./output/grit_b_objectdet
|
104 |
+
~~~
|
105 |
+
|
106 |
+
To train on multiple machine nodes, run:
|
107 |
+
~~~
|
108 |
+
python train_deepspeed.py --num-machines 4 --num-gpus-per-machine 8 --config-file configs/GRiT_B_ObjectDet.yaml --output-dir-name ./output/grit_b_objectdet
|
109 |
+
~~~
|
110 |
+
|
111 |
+
## Acknowledgement
|
112 |
+
Our code is in part based on [Detic](https://github.com/facebookresearch/Detic),
|
113 |
+
[CenterNet2](https://github.com/xingyizhou/CenterNet2),
|
114 |
+
[detectron2](https://github.com/facebookresearch/detectron2),
|
115 |
+
[GIT](https://github.com/microsoft/GenerativeImage2Text), and
|
116 |
+
[transformers](https://github.com/huggingface/transformers).
|
117 |
+
We thank the authors and appreciate their great works!
|
118 |
+
|
119 |
+
## Citation
|
120 |
+
|
121 |
+
If you find our work interesting and would like to cite it, please use the following BibTeX entry.
|
122 |
|
123 |
+
@article{wu2022grit,
|
124 |
+
title={GRiT: A Generative Region-to-text Transformer for Object Understanding},
|
125 |
+
author={Wu, Jialian and Wang, Jianfeng and Yang, Zhengyuan and Gan, Zhe and Liu, Zicheng and Yuan, Junsong and Wang, Lijuan},
|
126 |
+
journal={arXiv preprint arXiv:2212.00280},
|
127 |
+
year={2022}
|
128 |
+
}
|
app.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
# os.system("sudo apt-get update && sudo apt-get install -y git")
|
3 |
+
# os.system("sudo apt-get -y install pybind11-dev")
|
4 |
+
# os.system("git clone https://github.com/facebookresearch/detectron2.git")
|
5 |
+
# os.system("pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html")
|
6 |
+
os.system("cd detectron2 && pip install detectron2-0.6-cp310-cp310-linux_x86_64.whl")
|
7 |
+
# os.system("pip3 install torch torchvision torchaudio")
|
8 |
+
os.system("pip install deepspeed==0.7.0")
|
9 |
+
|
10 |
+
import site
|
11 |
+
from importlib import reload
|
12 |
+
reload(site)
|
13 |
+
|
14 |
+
from PIL import Image
|
15 |
+
import argparse
|
16 |
+
import sys
|
17 |
+
import numpy as np
|
18 |
+
import cv2
|
19 |
+
import gradio as gr
|
20 |
+
|
21 |
+
from detectron2.config import get_cfg
|
22 |
+
from detectron2.data.detection_utils import read_image
|
23 |
+
from detectron2.utils.logger import setup_logger
|
24 |
+
|
25 |
+
sys.path.insert(0, "third_party/CenterNet2/projects/CenterNet2/")
|
26 |
+
from centernet.config import add_centernet_config
|
27 |
+
from grit.config import add_grit_config
|
28 |
+
|
29 |
+
from grit.predictor import VisualizationDemo
|
30 |
+
|
31 |
+
def get_parser():
|
32 |
+
parser = argparse.ArgumentParser(description="Detectron2 demo for builtin configs")
|
33 |
+
parser.add_argument(
|
34 |
+
"--config-file",
|
35 |
+
default="configs/GRiT_B_DenseCap_ObjectDet.yaml",
|
36 |
+
metavar="FILE",
|
37 |
+
help="path to config file",
|
38 |
+
)
|
39 |
+
parser.add_argument("--cpu", action="store_true", help="Use CPU only.")
|
40 |
+
parser.add_argument(
|
41 |
+
"--confidence-threshold",
|
42 |
+
type=float,
|
43 |
+
default=0.5,
|
44 |
+
help="Minimum score for instance predictions to be shown",
|
45 |
+
)
|
46 |
+
parser.add_argument(
|
47 |
+
"--test-task",
|
48 |
+
type=str,
|
49 |
+
default="",
|
50 |
+
help="Choose a task to have GRiT perform",
|
51 |
+
)
|
52 |
+
parser.add_argument(
|
53 |
+
"--opts",
|
54 |
+
help="Modify config options using the command-line 'KEY VALUE' pairs",
|
55 |
+
default=["MODEL.WEIGHTS", "./models/grit_b_densecap_objectdet.pth"],
|
56 |
+
nargs=argparse.REMAINDER,
|
57 |
+
)
|
58 |
+
return parser
|
59 |
+
|
60 |
+
def setup_cfg(args):
|
61 |
+
cfg = get_cfg()
|
62 |
+
if args.cpu:
|
63 |
+
cfg.MODEL.DEVICE = "cpu"
|
64 |
+
add_centernet_config(cfg)
|
65 |
+
add_grit_config(cfg)
|
66 |
+
cfg.merge_from_file(args.config_file)
|
67 |
+
cfg.merge_from_list(args.opts)
|
68 |
+
# Set score_threshold for builtin models
|
69 |
+
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
|
70 |
+
cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = (
|
71 |
+
args.confidence_threshold
|
72 |
+
)
|
73 |
+
if args.test_task:
|
74 |
+
cfg.MODEL.TEST_TASK = args.test_task
|
75 |
+
cfg.MODEL.BEAM_SIZE = 1
|
76 |
+
cfg.MODEL.ROI_HEADS.SOFT_NMS_ENABLED = False
|
77 |
+
cfg.USE_ACT_CHECKPOINT = False
|
78 |
+
cfg.freeze()
|
79 |
+
return cfg
|
80 |
+
|
81 |
+
def predict(image_file):
|
82 |
+
image_array = np.array(image_file)[:, :, ::-1] # BGR
|
83 |
+
_, visualized_output = dense_captioning_demo.run_on_image(image_array)
|
84 |
+
visualized_output.save(os.path.join(os.getcwd(), "output.jpg"))
|
85 |
+
output_image = cv2.imread(os.path.join(os.getcwd(), "output.jpg"))
|
86 |
+
output_image = cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB)
|
87 |
+
return Image.fromarray(output_image)
|
88 |
+
|
89 |
+
|
90 |
+
|
91 |
+
args = get_parser().parse_args()
|
92 |
+
args.test_task = "DenseCap"
|
93 |
+
setup_logger(name="fvcore")
|
94 |
+
logger = setup_logger()
|
95 |
+
logger.info("Arguments: " + str(args))
|
96 |
+
|
97 |
+
cfg = setup_cfg(args)
|
98 |
+
|
99 |
+
dense_captioning_demo = VisualizationDemo(cfg)
|
100 |
+
|
101 |
+
demo = gr.Interface(
|
102 |
+
title="Dense Captioning - GRiT",
|
103 |
+
fn=predict,
|
104 |
+
inputs=gr.Image(type='pil', label="Original Image"),
|
105 |
+
outputs=gr.Image(type="pil",label="Output Image"),
|
106 |
+
examples=["example_1.jpg", "example_2.jpg"],
|
107 |
+
)
|
108 |
+
|
109 |
+
demo.launch()
|
configs/Base.yaml
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MODEL:
|
2 |
+
META_ARCHITECTURE: "GRiT"
|
3 |
+
MASK_ON: True
|
4 |
+
PROPOSAL_GENERATOR:
|
5 |
+
NAME: "CenterNet"
|
6 |
+
FPN:
|
7 |
+
IN_FEATURES: ["layer3", "layer4", "layer5"]
|
8 |
+
PIXEL_MEAN: [123.675, 116.280, 103.530]
|
9 |
+
PIXEL_STD: [58.395, 57.12, 57.375]
|
10 |
+
ROI_HEADS:
|
11 |
+
NAME: GRiTROIHeadsAndTextDecoder
|
12 |
+
IN_FEATURES: ["p3", "p4", "p5"]
|
13 |
+
IOU_THRESHOLDS: [0.6]
|
14 |
+
NUM_CLASSES: 1
|
15 |
+
SCORE_THRESH_TEST: 0.02
|
16 |
+
NMS_THRESH_TEST: 0.5
|
17 |
+
OBJECT_FEAT_POOLER_RES: 14
|
18 |
+
ROI_BOX_CASCADE_HEAD:
|
19 |
+
IOUS: [0.6, 0.7, 0.8]
|
20 |
+
ROI_BOX_HEAD:
|
21 |
+
NAME: "FastRCNNConvFCHead"
|
22 |
+
NUM_FC: 2
|
23 |
+
POOLER_RESOLUTION: 7
|
24 |
+
CLS_AGNOSTIC_BBOX_REG: True
|
25 |
+
MULT_PROPOSAL_SCORE: True
|
26 |
+
ROI_MASK_HEAD:
|
27 |
+
NAME: "MaskRCNNConvUpsampleHead"
|
28 |
+
NUM_CONV: 4
|
29 |
+
POOLER_RESOLUTION: 14
|
30 |
+
CLS_AGNOSTIC_MASK: True
|
31 |
+
CENTERNET:
|
32 |
+
NUM_CLASSES: 1
|
33 |
+
REG_WEIGHT: 1.
|
34 |
+
NOT_NORM_REG: True
|
35 |
+
ONLY_PROPOSAL: True
|
36 |
+
WITH_AGN_HM: True
|
37 |
+
INFERENCE_TH: 0.0001
|
38 |
+
PRE_NMS_TOPK_TRAIN: 4000
|
39 |
+
POST_NMS_TOPK_TRAIN: 2000
|
40 |
+
PRE_NMS_TOPK_TEST: 1000
|
41 |
+
POST_NMS_TOPK_TEST: 256
|
42 |
+
NMS_TH_TRAIN: 0.9
|
43 |
+
NMS_TH_TEST: 0.9
|
44 |
+
POS_WEIGHT: 0.5
|
45 |
+
NEG_WEIGHT: 0.5
|
46 |
+
IGNORE_HIGH_FP: 0.85
|
47 |
+
DATASETS:
|
48 |
+
TRAIN: ("coco_2017_train",)
|
49 |
+
TEST: ("coco_2017_val",)
|
50 |
+
DATALOADER:
|
51 |
+
SAMPLER_TRAIN: "MultiDatasetSampler"
|
52 |
+
DATASET_RATIO: [1]
|
53 |
+
DATASET_INPUT_SIZE: [1024]
|
54 |
+
DATASET_INPUT_SCALE: [[0.1, 2.0]]
|
55 |
+
FILTER_EMPTY_ANNOTATIONS: False
|
56 |
+
NUM_WORKERS: 8
|
57 |
+
TEST:
|
58 |
+
DETECTIONS_PER_IMAGE: 256
|
59 |
+
SOLVER:
|
60 |
+
LR_SCHEDULER_NAME: "WarmupCosineLR"
|
61 |
+
CHECKPOINT_PERIOD: 10000
|
62 |
+
WARMUP_ITERS: 1000
|
63 |
+
WARMUP_FACTOR: 0.001
|
64 |
+
USE_CUSTOM_SOLVER: True
|
65 |
+
OPTIMIZER: "ADAMW"
|
66 |
+
MAX_ITER: 180000
|
67 |
+
IMS_PER_BATCH: 64
|
68 |
+
BASE_LR: 0.00008
|
69 |
+
VIT_LAYER_DECAY: True
|
70 |
+
CLIP_GRADIENTS:
|
71 |
+
ENABLED: True
|
72 |
+
INPUT:
|
73 |
+
FORMAT: RGB
|
74 |
+
CUSTOM_AUG: EfficientDetResizeCrop
|
75 |
+
TRAIN_SIZE: 640
|
76 |
+
USE_ACT_CHECKPOINT: True
|
77 |
+
VERSION: 2
|
configs/GRiT_B_DenseCap.yaml
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "Base.yaml"
|
2 |
+
MODEL:
|
3 |
+
TRAIN_TASK: ["DenseCap"]
|
4 |
+
TEST_TASK: "DenseCap"
|
5 |
+
MASK_ON: False
|
6 |
+
ROI_HEADS:
|
7 |
+
SOFT_NMS_ENABLED: False
|
8 |
+
BEAM_SIZE: 1
|
9 |
+
WEIGHTS: "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_base.pth"
|
10 |
+
BACKBONE:
|
11 |
+
NAME: build_vit_fpn_backbone
|
12 |
+
VIT_LAYERS: 12
|
13 |
+
SOLVER:
|
14 |
+
VIT_LAYER_DECAY_RATE: 0.7
|
15 |
+
DATASETS:
|
16 |
+
TRAIN: ("vg_train",)
|
17 |
+
TEST: ("vg_test",)
|
18 |
+
DATALOADER:
|
19 |
+
DATASET_BS: 2
|
20 |
+
OUTPUT_DIR: "./output/GRiT_B_DenseCap"
|
configs/GRiT_B_DenseCap_ObjectDet.yaml
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "Base.yaml"
|
2 |
+
MODEL:
|
3 |
+
TRAIN_TASK: ["ObjectDet", "DenseCap"]
|
4 |
+
TEST_TASK: "DenseCap" # DenseCap or ObjectDet: Choose one for testing
|
5 |
+
MASK_ON: True
|
6 |
+
ROI_HEADS:
|
7 |
+
SOFT_NMS_ENABLED: False
|
8 |
+
BEAM_SIZE: 1
|
9 |
+
WEIGHTS: "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_base.pth"
|
10 |
+
BACKBONE:
|
11 |
+
NAME: build_vit_fpn_backbone
|
12 |
+
VIT_LAYERS: 12
|
13 |
+
SOLVER:
|
14 |
+
VIT_LAYER_DECAY_RATE: 0.7
|
15 |
+
DATASETS:
|
16 |
+
TRAIN: ("GRiT_coco2017_train", "vg_train")
|
17 |
+
TEST: ("coco_2017_test-dev",)
|
18 |
+
DATALOADER:
|
19 |
+
DATASET_RATIO: [1, 1]
|
20 |
+
DATASET_BS: 2
|
21 |
+
DATASET_INPUT_SIZE: [1024, 1024]
|
22 |
+
DATASET_INPUT_SCALE: [[0.1, 2.0], [0.1, 2.0]]
|
23 |
+
OUTPUT_DIR: "./output/GRiT_B_DenseCap_ObjectDet"
|
configs/GRiT_B_ObjectDet.yaml
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "Base.yaml"
|
2 |
+
MODEL:
|
3 |
+
TRAIN_TASK: ["ObjectDet"]
|
4 |
+
TEST_TASK: "ObjectDet"
|
5 |
+
MASK_ON: True
|
6 |
+
ROI_HEADS:
|
7 |
+
SOFT_NMS_ENABLED: True
|
8 |
+
BEAM_SIZE: 3
|
9 |
+
WEIGHTS: "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_base.pth"
|
10 |
+
BACKBONE:
|
11 |
+
NAME: build_vit_fpn_backbone
|
12 |
+
VIT_LAYERS: 12
|
13 |
+
SOLVER:
|
14 |
+
VIT_LAYER_DECAY_RATE: 0.7
|
15 |
+
DATASETS:
|
16 |
+
TRAIN: ("GRiT_coco2017_train",)
|
17 |
+
TEST: ("coco_2017_val",)
|
18 |
+
DATALOADER:
|
19 |
+
DATASET_BS: 2
|
20 |
+
OUTPUT_DIR: "./output/GRiT_B_ObjectDet"
|
configs/GRiT_H_ObjectDet.yaml
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "Base.yaml"
|
2 |
+
MODEL:
|
3 |
+
TRAIN_TASK: ["ObjectDet"]
|
4 |
+
TEST_TASK: "ObjectDet"
|
5 |
+
MASK_ON: True
|
6 |
+
ROI_HEADS:
|
7 |
+
SOFT_NMS_ENABLED: True
|
8 |
+
BEAM_SIZE: 3
|
9 |
+
WEIGHTS: "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_huge_p14to16.pth"
|
10 |
+
BACKBONE:
|
11 |
+
NAME: build_vit_fpn_backbone_huge
|
12 |
+
VIT_LAYERS: 32
|
13 |
+
SOLVER:
|
14 |
+
MAX_ITER: 135000
|
15 |
+
VIT_LAYER_DECAY_RATE: 0.9
|
16 |
+
DATASETS:
|
17 |
+
TRAIN: ("GRiT_coco2017_train",)
|
18 |
+
TEST: ("coco_2017_val",)
|
19 |
+
DATALOADER:
|
20 |
+
DATASET_BS: 1
|
21 |
+
OUTPUT_DIR: "./output/GRiT_H_ObjectDet"
|
configs/GRiT_L_ObjectDet.yaml
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "Base.yaml"
|
2 |
+
MODEL:
|
3 |
+
TRAIN_TASK: ["ObjectDet"]
|
4 |
+
TEST_TASK: "ObjectDet"
|
5 |
+
MASK_ON: True
|
6 |
+
ROI_HEADS:
|
7 |
+
SOFT_NMS_ENABLED: True
|
8 |
+
BEAM_SIZE: 3
|
9 |
+
WEIGHTS: "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_large.pth"
|
10 |
+
BACKBONE:
|
11 |
+
NAME: build_vit_fpn_backbone_large
|
12 |
+
VIT_LAYERS: 24
|
13 |
+
SOLVER:
|
14 |
+
VIT_LAYER_DECAY_RATE: 0.8
|
15 |
+
DATASETS:
|
16 |
+
TRAIN: ("GRiT_coco2017_train",)
|
17 |
+
TEST: ("coco_2017_val",)
|
18 |
+
DATALOADER:
|
19 |
+
DATASET_BS: 1
|
20 |
+
OUTPUT_DIR: "./output/GRiT_L_ObjectDet"
|
datasets/DATASETS.md
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Dataset preparation
|
2 |
+
|
3 |
+
|
4 |
+
## COCO Dataset
|
5 |
+
|
6 |
+
- Download the coco 2017 dataset from the [official website](https://cocodataset.org/#download).
|
7 |
+
|
8 |
+
Dataset strcture should look like:
|
9 |
+
~~~
|
10 |
+
${GRiT_ROOT}
|
11 |
+
|-- datasets
|
12 |
+
`-- |-- coco
|
13 |
+
|-- |-- train2017/
|
14 |
+
|-- |-- val2017/
|
15 |
+
|-- |-- test2017/
|
16 |
+
|-- |-- annotations/
|
17 |
+
|-- |-- |-- instances_train2017.json
|
18 |
+
|-- |-- |-- instances_val2017.json
|
19 |
+
|-- |-- |-- image_info_test-dev2017.json
|
20 |
+
~~~
|
21 |
+
|
22 |
+
## VG Dataset
|
23 |
+
- Download images from [official website](https://visualgenome.org/api/v0/api_home.html)
|
24 |
+
- Download our pre-processed annotations:
|
25 |
+
[train.json](https://datarelease.blob.core.windows.net/grit/VG_preprocessed_annotations/train.json) and
|
26 |
+
[test.json](https://datarelease.blob.core.windows.net/grit/VG_preprocessed_annotations/test.json)
|
27 |
+
|
28 |
+
Dataset strcture should look like:
|
29 |
+
~~~
|
30 |
+
${GRiT_ROOT}
|
31 |
+
|-- datasets
|
32 |
+
`-- |-- vg
|
33 |
+
|-- |-- images/
|
34 |
+
|-- |-- annotations/
|
35 |
+
|-- |-- |-- train.json
|
36 |
+
|-- |-- |-- test.json
|
37 |
+
~~~
|
38 |
+
|
39 |
+
## References
|
40 |
+
Please cite the corresponding references if you use the datasets.
|
41 |
+
|
42 |
+
~~~
|
43 |
+
@inproceedings{lin2014microsoft,
|
44 |
+
title={Microsoft coco: Common objects in context},
|
45 |
+
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
|
46 |
+
booktitle={European conference on computer vision},
|
47 |
+
pages={740--755},
|
48 |
+
year={2014},
|
49 |
+
organization={Springer}
|
50 |
+
}
|
51 |
+
|
52 |
+
@article{krishna2017visual,
|
53 |
+
title={Visual genome: Connecting language and vision using crowdsourced dense image annotations},
|
54 |
+
author={Krishna, Ranjay and Zhu, Yuke and Groth, Oliver and Johnson, Justin and Hata, Kenji and Kravitz, Joshua and Chen, Stephanie and Kalantidis, Yannis and Li, Li-Jia and Shamma, David A and others},
|
55 |
+
journal={International journal of computer vision},
|
56 |
+
volume={123},
|
57 |
+
number={1},
|
58 |
+
pages={32--73},
|
59 |
+
year={2017},
|
60 |
+
publisher={Springer}
|
61 |
+
}
|
62 |
+
~~~
|
demo.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import multiprocessing as mp
|
3 |
+
import os
|
4 |
+
import time
|
5 |
+
import cv2
|
6 |
+
import tqdm
|
7 |
+
import sys
|
8 |
+
|
9 |
+
from detectron2.config import get_cfg
|
10 |
+
from detectron2.data.detection_utils import read_image
|
11 |
+
from detectron2.utils.logger import setup_logger
|
12 |
+
|
13 |
+
sys.path.insert(0, 'third_party/CenterNet2/projects/CenterNet2/')
|
14 |
+
from centernet.config import add_centernet_config
|
15 |
+
from grit.config import add_grit_config
|
16 |
+
|
17 |
+
from grit.predictor import VisualizationDemo
|
18 |
+
|
19 |
+
|
20 |
+
# constants
|
21 |
+
WINDOW_NAME = "GRiT"
|
22 |
+
|
23 |
+
|
24 |
+
def setup_cfg(args):
|
25 |
+
cfg = get_cfg()
|
26 |
+
if args.cpu:
|
27 |
+
cfg.MODEL.DEVICE="cpu"
|
28 |
+
add_centernet_config(cfg)
|
29 |
+
add_grit_config(cfg)
|
30 |
+
cfg.merge_from_file(args.config_file)
|
31 |
+
cfg.merge_from_list(args.opts)
|
32 |
+
# Set score_threshold for builtin models
|
33 |
+
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
|
34 |
+
cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold
|
35 |
+
if args.test_task:
|
36 |
+
cfg.MODEL.TEST_TASK = args.test_task
|
37 |
+
cfg.MODEL.BEAM_SIZE = 1
|
38 |
+
cfg.MODEL.ROI_HEADS.SOFT_NMS_ENABLED = False
|
39 |
+
cfg.USE_ACT_CHECKPOINT = False
|
40 |
+
cfg.freeze()
|
41 |
+
return cfg
|
42 |
+
|
43 |
+
|
44 |
+
def get_parser():
|
45 |
+
parser = argparse.ArgumentParser(description="Detectron2 demo for builtin configs")
|
46 |
+
parser.add_argument(
|
47 |
+
"--config-file",
|
48 |
+
default="",
|
49 |
+
metavar="FILE",
|
50 |
+
help="path to config file",
|
51 |
+
)
|
52 |
+
parser.add_argument("--cpu", action='store_true', help="Use CPU only.")
|
53 |
+
parser.add_argument(
|
54 |
+
"--input",
|
55 |
+
nargs="+",
|
56 |
+
help="A list of space separated input images; "
|
57 |
+
"or a single glob pattern such as 'directory/*.jpg'",
|
58 |
+
)
|
59 |
+
parser.add_argument(
|
60 |
+
"--output",
|
61 |
+
help="A file or directory to save output visualizations. "
|
62 |
+
"If not given, will show output in an OpenCV window.",
|
63 |
+
)
|
64 |
+
parser.add_argument(
|
65 |
+
"--confidence-threshold",
|
66 |
+
type=float,
|
67 |
+
default=0.5,
|
68 |
+
help="Minimum score for instance predictions to be shown",
|
69 |
+
)
|
70 |
+
parser.add_argument(
|
71 |
+
"--test-task",
|
72 |
+
type=str,
|
73 |
+
default='',
|
74 |
+
help="Choose a task to have GRiT perform",
|
75 |
+
)
|
76 |
+
parser.add_argument(
|
77 |
+
"--opts",
|
78 |
+
help="Modify config options using the command-line 'KEY VALUE' pairs",
|
79 |
+
default=[],
|
80 |
+
nargs=argparse.REMAINDER,
|
81 |
+
)
|
82 |
+
return parser
|
83 |
+
|
84 |
+
|
85 |
+
if __name__ == "__main__":
|
86 |
+
mp.set_start_method("spawn", force=True)
|
87 |
+
args = get_parser().parse_args()
|
88 |
+
setup_logger(name="fvcore")
|
89 |
+
logger = setup_logger()
|
90 |
+
logger.info("Arguments: " + str(args))
|
91 |
+
|
92 |
+
cfg = setup_cfg(args)
|
93 |
+
|
94 |
+
demo = VisualizationDemo(cfg)
|
95 |
+
|
96 |
+
if args.input:
|
97 |
+
for path in tqdm.tqdm(os.listdir(args.input[0]), disable=not args.output):
|
98 |
+
img = read_image(os.path.join(args.input[0], path), format="BGR")
|
99 |
+
start_time = time.time()
|
100 |
+
predictions, visualized_output = demo.run_on_image(img)
|
101 |
+
logger.info(
|
102 |
+
"{}: {} in {:.2f}s".format(
|
103 |
+
path,
|
104 |
+
"detected {} instances".format(len(predictions["instances"]))
|
105 |
+
if "instances" in predictions
|
106 |
+
else "finished",
|
107 |
+
time.time() - start_time,
|
108 |
+
)
|
109 |
+
)
|
110 |
+
|
111 |
+
if args.output:
|
112 |
+
if not os.path.exists(args.output):
|
113 |
+
os.mkdir(args.output)
|
114 |
+
if os.path.isdir(args.output):
|
115 |
+
assert os.path.isdir(args.output), args.output
|
116 |
+
out_filename = os.path.join(args.output, os.path.basename(path))
|
117 |
+
else:
|
118 |
+
assert len(args.input) == 1, "Please specify a directory with args.output"
|
119 |
+
out_filename = args.output
|
120 |
+
visualized_output.save(out_filename)
|
121 |
+
else:
|
122 |
+
cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
|
123 |
+
cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1])
|
124 |
+
if cv2.waitKey(0) == 27:
|
125 |
+
break # esc to quit
|
demo_images/000000353174.jpg
ADDED
![]() |
demo_images/000000438652.jpg
ADDED
![]() |
demo_images/000000453583.jpg
ADDED
![]() |
demo_images/000000466185.jpg
ADDED
![]() |
demo_images/000000497110.jpg
ADDED
![]() |
demo_images/000000497861.jpg
ADDED
![]() |
demo_images/000000546072.jpg
ADDED
![]() |
detectron2/.circleci/config.yml
ADDED
@@ -0,0 +1,270 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: 2.1
|
2 |
+
|
3 |
+
# -------------------------------------------------------------------------------------
|
4 |
+
# Environments to run the jobs in
|
5 |
+
# -------------------------------------------------------------------------------------
|
6 |
+
cpu: &cpu
|
7 |
+
machine:
|
8 |
+
image: ubuntu-2004:202107-02
|
9 |
+
resource_class: medium
|
10 |
+
|
11 |
+
gpu: &gpu
|
12 |
+
machine:
|
13 |
+
# NOTE: use a cuda vesion that's supported by all our pytorch versions
|
14 |
+
image: ubuntu-1604-cuda-11.1:202012-01
|
15 |
+
resource_class: gpu.nvidia.small
|
16 |
+
|
17 |
+
windows-cpu: &windows_cpu
|
18 |
+
machine:
|
19 |
+
resource_class: windows.medium
|
20 |
+
image: windows-server-2019-vs2019:stable
|
21 |
+
shell: powershell.exe
|
22 |
+
|
23 |
+
# windows-gpu: &windows_gpu
|
24 |
+
# machine:
|
25 |
+
# resource_class: windows.gpu.nvidia.medium
|
26 |
+
# image: windows-server-2019-nvidia:stable
|
27 |
+
|
28 |
+
version_parameters: &version_parameters
|
29 |
+
parameters:
|
30 |
+
pytorch_version:
|
31 |
+
type: string
|
32 |
+
torchvision_version:
|
33 |
+
type: string
|
34 |
+
pytorch_index:
|
35 |
+
type: string
|
36 |
+
# use test wheels index to have access to RC wheels
|
37 |
+
# https://download.pytorch.org/whl/test/torch_test.html
|
38 |
+
default: "https://download.pytorch.org/whl/torch_stable.html"
|
39 |
+
python_version: # NOTE: only affect linux
|
40 |
+
type: string
|
41 |
+
default: '3.6.8'
|
42 |
+
|
43 |
+
environment:
|
44 |
+
PYTORCH_VERSION: << parameters.pytorch_version >>
|
45 |
+
TORCHVISION_VERSION: << parameters.torchvision_version >>
|
46 |
+
PYTORCH_INDEX: << parameters.pytorch_index >>
|
47 |
+
PYTHON_VERSION: << parameters.python_version>>
|
48 |
+
# point datasets to ~/.torch so it's cached in CI
|
49 |
+
DETECTRON2_DATASETS: ~/.torch/datasets
|
50 |
+
|
51 |
+
# -------------------------------------------------------------------------------------
|
52 |
+
# Re-usable commands
|
53 |
+
# -------------------------------------------------------------------------------------
|
54 |
+
# install_nvidia_driver: &install_nvidia_driver
|
55 |
+
# - run:
|
56 |
+
# name: Install nvidia driver
|
57 |
+
# working_directory: ~/
|
58 |
+
# command: |
|
59 |
+
# wget -q 'https://s3.amazonaws.com/ossci-linux/nvidia_driver/NVIDIA-Linux-x86_64-430.40.run'
|
60 |
+
# sudo /bin/bash ./NVIDIA-Linux-x86_64-430.40.run -s --no-drm
|
61 |
+
# nvidia-smi
|
62 |
+
|
63 |
+
add_ssh_keys: &add_ssh_keys
|
64 |
+
# https://circleci.com/docs/2.0/add-ssh-key/
|
65 |
+
- add_ssh_keys:
|
66 |
+
fingerprints:
|
67 |
+
- "e4:13:f2:22:d4:49:e8:e4:57:5a:ac:20:2f:3f:1f:ca"
|
68 |
+
|
69 |
+
install_python: &install_python
|
70 |
+
- run:
|
71 |
+
name: Install Python
|
72 |
+
working_directory: ~/
|
73 |
+
command: |
|
74 |
+
# upgrade pyenv
|
75 |
+
cd /opt/circleci/.pyenv/plugins/python-build/../.. && git pull && cd -
|
76 |
+
pyenv install -s $PYTHON_VERSION
|
77 |
+
pyenv global $PYTHON_VERSION
|
78 |
+
python --version
|
79 |
+
which python
|
80 |
+
pip install --upgrade pip
|
81 |
+
|
82 |
+
setup_venv: &setup_venv
|
83 |
+
- run:
|
84 |
+
name: Setup Virtual Env
|
85 |
+
working_directory: ~/
|
86 |
+
command: |
|
87 |
+
python -m venv ~/venv
|
88 |
+
echo ". ~/venv/bin/activate" >> $BASH_ENV
|
89 |
+
. ~/venv/bin/activate
|
90 |
+
python --version
|
91 |
+
which python
|
92 |
+
which pip
|
93 |
+
pip install --upgrade pip
|
94 |
+
|
95 |
+
setup_venv_win: &setup_venv_win
|
96 |
+
- run:
|
97 |
+
name: Setup Virutal Env for Windows
|
98 |
+
command: |
|
99 |
+
pip install virtualenv
|
100 |
+
python -m virtualenv env
|
101 |
+
.\env\Scripts\activate
|
102 |
+
python --version
|
103 |
+
which python
|
104 |
+
which pip
|
105 |
+
|
106 |
+
install_linux_dep: &install_linux_dep
|
107 |
+
- run:
|
108 |
+
name: Install Dependencies
|
109 |
+
command: |
|
110 |
+
# disable crash coredump, so unittests fail fast
|
111 |
+
sudo systemctl stop apport.service
|
112 |
+
# install from github to get latest; install iopath first since fvcore depends on it
|
113 |
+
pip install --progress-bar off -U 'git+https://github.com/facebookresearch/iopath'
|
114 |
+
pip install --progress-bar off -U 'git+https://github.com/facebookresearch/fvcore'
|
115 |
+
# Don't use pytest-xdist: cuda tests are unstable under multi-process workers.
|
116 |
+
pip install --progress-bar off ninja opencv-python-headless pytest tensorboard pycocotools
|
117 |
+
pip install --progress-bar off torch==$PYTORCH_VERSION -f $PYTORCH_INDEX
|
118 |
+
if [[ "$TORCHVISION_VERSION" == "master" ]]; then
|
119 |
+
pip install git+https://github.com/pytorch/vision.git
|
120 |
+
else
|
121 |
+
pip install --progress-bar off torchvision==$TORCHVISION_VERSION -f $PYTORCH_INDEX
|
122 |
+
fi
|
123 |
+
|
124 |
+
python -c 'import torch; print("CUDA:", torch.cuda.is_available())'
|
125 |
+
gcc --version
|
126 |
+
|
127 |
+
install_detectron2: &install_detectron2
|
128 |
+
- run:
|
129 |
+
name: Install Detectron2
|
130 |
+
command: |
|
131 |
+
# Remove first, in case it's in the CI cache
|
132 |
+
pip uninstall -y detectron2
|
133 |
+
|
134 |
+
pip install --progress-bar off -e .[all]
|
135 |
+
python -m detectron2.utils.collect_env
|
136 |
+
./datasets/prepare_for_tests.sh
|
137 |
+
|
138 |
+
run_unittests: &run_unittests
|
139 |
+
- run:
|
140 |
+
name: Run Unit Tests
|
141 |
+
command: |
|
142 |
+
pytest -v --durations=15 tests # parallel causes some random failures
|
143 |
+
|
144 |
+
uninstall_tests: &uninstall_tests
|
145 |
+
- run:
|
146 |
+
name: Run Tests After Uninstalling
|
147 |
+
command: |
|
148 |
+
pip uninstall -y detectron2
|
149 |
+
# Remove built binaries
|
150 |
+
rm -rf build/ detectron2/*.so
|
151 |
+
# Tests that code is importable without installation
|
152 |
+
PYTHONPATH=. ./.circleci/import-tests.sh
|
153 |
+
|
154 |
+
|
155 |
+
# -------------------------------------------------------------------------------------
|
156 |
+
# Jobs to run
|
157 |
+
# -------------------------------------------------------------------------------------
|
158 |
+
jobs:
|
159 |
+
linux_cpu_tests:
|
160 |
+
<<: *cpu
|
161 |
+
<<: *version_parameters
|
162 |
+
|
163 |
+
working_directory: ~/detectron2
|
164 |
+
|
165 |
+
steps:
|
166 |
+
- checkout
|
167 |
+
|
168 |
+
# Cache the venv directory that contains python, dependencies, and checkpoints
|
169 |
+
# Refresh the key when dependencies should be updated (e.g. when pytorch releases)
|
170 |
+
- restore_cache:
|
171 |
+
keys:
|
172 |
+
- cache-{{ arch }}-<< parameters.pytorch_version >>-{{ .Branch }}-20210827
|
173 |
+
|
174 |
+
- <<: *install_python
|
175 |
+
- <<: *install_linux_dep
|
176 |
+
- <<: *install_detectron2
|
177 |
+
- <<: *run_unittests
|
178 |
+
- <<: *uninstall_tests
|
179 |
+
|
180 |
+
- save_cache:
|
181 |
+
paths:
|
182 |
+
- /opt/circleci/.pyenv
|
183 |
+
- ~/.torch
|
184 |
+
key: cache-{{ arch }}-<< parameters.pytorch_version >>-{{ .Branch }}-20210827
|
185 |
+
|
186 |
+
|
187 |
+
linux_gpu_tests:
|
188 |
+
<<: *gpu
|
189 |
+
<<: *version_parameters
|
190 |
+
|
191 |
+
working_directory: ~/detectron2
|
192 |
+
|
193 |
+
steps:
|
194 |
+
- checkout
|
195 |
+
|
196 |
+
- restore_cache:
|
197 |
+
keys:
|
198 |
+
- cache-{{ arch }}-<< parameters.pytorch_version >>-{{ .Branch }}-20210827
|
199 |
+
|
200 |
+
- <<: *install_python
|
201 |
+
- <<: *install_linux_dep
|
202 |
+
- <<: *install_detectron2
|
203 |
+
- <<: *run_unittests
|
204 |
+
- <<: *uninstall_tests
|
205 |
+
|
206 |
+
- save_cache:
|
207 |
+
paths:
|
208 |
+
- /opt/circleci/.pyenv
|
209 |
+
- ~/.torch
|
210 |
+
key: cache-{{ arch }}-<< parameters.pytorch_version >>-{{ .Branch }}-20210827
|
211 |
+
|
212 |
+
windows_cpu_build:
|
213 |
+
<<: *windows_cpu
|
214 |
+
<<: *version_parameters
|
215 |
+
steps:
|
216 |
+
- <<: *add_ssh_keys
|
217 |
+
- checkout
|
218 |
+
- <<: *setup_venv_win
|
219 |
+
|
220 |
+
# Cache the env directory that contains dependencies
|
221 |
+
- restore_cache:
|
222 |
+
keys:
|
223 |
+
- cache-{{ arch }}-<< parameters.pytorch_version >>-{{ .Branch }}-20210404
|
224 |
+
|
225 |
+
- run:
|
226 |
+
name: Install Dependencies
|
227 |
+
command: |
|
228 |
+
pip install certifi --ignore-installed # required on windows to workaround some cert issue
|
229 |
+
pip install numpy cython # required on windows before pycocotools
|
230 |
+
pip install opencv-python-headless pytest-xdist pycocotools tensorboard
|
231 |
+
pip install -U git+https://github.com/facebookresearch/iopath
|
232 |
+
pip install -U git+https://github.com/facebookresearch/fvcore
|
233 |
+
pip install torch==$env:PYTORCH_VERSION torchvision==$env:TORCHVISION_VERSION -f $env:PYTORCH_INDEX
|
234 |
+
|
235 |
+
- save_cache:
|
236 |
+
paths:
|
237 |
+
- env
|
238 |
+
key: cache-{{ arch }}-<< parameters.pytorch_version >>-{{ .Branch }}-20210404
|
239 |
+
|
240 |
+
- <<: *install_detectron2
|
241 |
+
# TODO: unittest fails for now
|
242 |
+
|
243 |
+
workflows:
|
244 |
+
version: 2
|
245 |
+
regular_test:
|
246 |
+
jobs:
|
247 |
+
- linux_cpu_tests:
|
248 |
+
name: linux_cpu_tests_pytorch1.10
|
249 |
+
pytorch_version: '1.10.0+cpu'
|
250 |
+
torchvision_version: '0.11.1+cpu'
|
251 |
+
- linux_gpu_tests:
|
252 |
+
name: linux_gpu_tests_pytorch1.8
|
253 |
+
pytorch_version: '1.8.1+cu111'
|
254 |
+
torchvision_version: '0.9.1+cu111'
|
255 |
+
- linux_gpu_tests:
|
256 |
+
name: linux_gpu_tests_pytorch1.9
|
257 |
+
pytorch_version: '1.9+cu111'
|
258 |
+
torchvision_version: '0.10+cu111'
|
259 |
+
- linux_gpu_tests:
|
260 |
+
name: linux_gpu_tests_pytorch1.10
|
261 |
+
pytorch_version: '1.10+cu111'
|
262 |
+
torchvision_version: '0.11.1+cu111'
|
263 |
+
- linux_gpu_tests:
|
264 |
+
name: linux_gpu_tests_pytorch1.10_python39
|
265 |
+
pytorch_version: '1.10+cu111'
|
266 |
+
torchvision_version: '0.11.1+cu111'
|
267 |
+
python_version: '3.9.6'
|
268 |
+
- windows_cpu_build:
|
269 |
+
pytorch_version: '1.10+cpu'
|
270 |
+
torchvision_version: '0.11.1+cpu'
|
detectron2/.circleci/import-tests.sh
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash -e
|
2 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
3 |
+
|
4 |
+
# Test that import works without building detectron2.
|
5 |
+
|
6 |
+
# Check that _C is not importable
|
7 |
+
python -c "from detectron2 import _C" > /dev/null 2>&1 && {
|
8 |
+
echo "This test should be run without building detectron2."
|
9 |
+
exit 1
|
10 |
+
}
|
11 |
+
|
12 |
+
# Check that other modules are still importable, even when _C is not importable
|
13 |
+
python -c "from detectron2 import modeling"
|
14 |
+
python -c "from detectron2 import modeling, data"
|
15 |
+
python -c "from detectron2 import evaluation, export, checkpoint"
|
16 |
+
python -c "from detectron2 import utils, engine"
|
detectron2/.clang-format
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
AccessModifierOffset: -1
|
2 |
+
AlignAfterOpenBracket: AlwaysBreak
|
3 |
+
AlignConsecutiveAssignments: false
|
4 |
+
AlignConsecutiveDeclarations: false
|
5 |
+
AlignEscapedNewlinesLeft: true
|
6 |
+
AlignOperands: false
|
7 |
+
AlignTrailingComments: false
|
8 |
+
AllowAllParametersOfDeclarationOnNextLine: false
|
9 |
+
AllowShortBlocksOnASingleLine: false
|
10 |
+
AllowShortCaseLabelsOnASingleLine: false
|
11 |
+
AllowShortFunctionsOnASingleLine: Empty
|
12 |
+
AllowShortIfStatementsOnASingleLine: false
|
13 |
+
AllowShortLoopsOnASingleLine: false
|
14 |
+
AlwaysBreakAfterReturnType: None
|
15 |
+
AlwaysBreakBeforeMultilineStrings: true
|
16 |
+
AlwaysBreakTemplateDeclarations: true
|
17 |
+
BinPackArguments: false
|
18 |
+
BinPackParameters: false
|
19 |
+
BraceWrapping:
|
20 |
+
AfterClass: false
|
21 |
+
AfterControlStatement: false
|
22 |
+
AfterEnum: false
|
23 |
+
AfterFunction: false
|
24 |
+
AfterNamespace: false
|
25 |
+
AfterObjCDeclaration: false
|
26 |
+
AfterStruct: false
|
27 |
+
AfterUnion: false
|
28 |
+
BeforeCatch: false
|
29 |
+
BeforeElse: false
|
30 |
+
IndentBraces: false
|
31 |
+
BreakBeforeBinaryOperators: None
|
32 |
+
BreakBeforeBraces: Attach
|
33 |
+
BreakBeforeTernaryOperators: true
|
34 |
+
BreakConstructorInitializersBeforeComma: false
|
35 |
+
BreakAfterJavaFieldAnnotations: false
|
36 |
+
BreakStringLiterals: false
|
37 |
+
ColumnLimit: 80
|
38 |
+
CommentPragmas: '^ IWYU pragma:'
|
39 |
+
ConstructorInitializerAllOnOneLineOrOnePerLine: true
|
40 |
+
ConstructorInitializerIndentWidth: 4
|
41 |
+
ContinuationIndentWidth: 4
|
42 |
+
Cpp11BracedListStyle: true
|
43 |
+
DerivePointerAlignment: false
|
44 |
+
DisableFormat: false
|
45 |
+
ForEachMacros: [ FOR_EACH, FOR_EACH_R, FOR_EACH_RANGE, ]
|
46 |
+
IncludeCategories:
|
47 |
+
- Regex: '^<.*\.h(pp)?>'
|
48 |
+
Priority: 1
|
49 |
+
- Regex: '^<.*'
|
50 |
+
Priority: 2
|
51 |
+
- Regex: '.*'
|
52 |
+
Priority: 3
|
53 |
+
IndentCaseLabels: true
|
54 |
+
IndentWidth: 2
|
55 |
+
IndentWrappedFunctionNames: false
|
56 |
+
KeepEmptyLinesAtTheStartOfBlocks: false
|
57 |
+
MacroBlockBegin: ''
|
58 |
+
MacroBlockEnd: ''
|
59 |
+
MaxEmptyLinesToKeep: 1
|
60 |
+
NamespaceIndentation: None
|
61 |
+
ObjCBlockIndentWidth: 2
|
62 |
+
ObjCSpaceAfterProperty: false
|
63 |
+
ObjCSpaceBeforeProtocolList: false
|
64 |
+
PenaltyBreakBeforeFirstCallParameter: 1
|
65 |
+
PenaltyBreakComment: 300
|
66 |
+
PenaltyBreakFirstLessLess: 120
|
67 |
+
PenaltyBreakString: 1000
|
68 |
+
PenaltyExcessCharacter: 1000000
|
69 |
+
PenaltyReturnTypeOnItsOwnLine: 200
|
70 |
+
PointerAlignment: Left
|
71 |
+
ReflowComments: true
|
72 |
+
SortIncludes: true
|
73 |
+
SpaceAfterCStyleCast: false
|
74 |
+
SpaceBeforeAssignmentOperators: true
|
75 |
+
SpaceBeforeParens: ControlStatements
|
76 |
+
SpaceInEmptyParentheses: false
|
77 |
+
SpacesBeforeTrailingComments: 1
|
78 |
+
SpacesInAngles: false
|
79 |
+
SpacesInContainerLiterals: true
|
80 |
+
SpacesInCStyleCastParentheses: false
|
81 |
+
SpacesInParentheses: false
|
82 |
+
SpacesInSquareBrackets: false
|
83 |
+
Standard: Cpp11
|
84 |
+
TabWidth: 8
|
85 |
+
UseTab: Never
|
detectron2/.flake8
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This is an example .flake8 config, used when developing *Black* itself.
|
2 |
+
# Keep in sync with setup.cfg which is used for source packages.
|
3 |
+
|
4 |
+
[flake8]
|
5 |
+
ignore = W503, E203, E221, C901, C408, E741, C407, B017, F811
|
6 |
+
max-line-length = 100
|
7 |
+
max-complexity = 18
|
8 |
+
select = B,C,E,F,W,T4,B9
|
9 |
+
exclude = build
|
10 |
+
per-file-ignores =
|
11 |
+
**/__init__.py:F401,F403,E402
|
12 |
+
**/configs/**.py:F401,E402
|
13 |
+
configs/**.py:F401,E402
|
14 |
+
**/tests/config/**.py:F401,E402
|
15 |
+
tests/config/**.py:F401,E402
|
detectron2/.gitignore
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# output dir
|
2 |
+
output
|
3 |
+
instant_test_output
|
4 |
+
inference_test_output
|
5 |
+
|
6 |
+
|
7 |
+
*.png
|
8 |
+
*.json
|
9 |
+
*.diff
|
10 |
+
*.jpg
|
11 |
+
!/projects/DensePose/doc/images/*.jpg
|
12 |
+
|
13 |
+
# compilation and distribution
|
14 |
+
__pycache__
|
15 |
+
_ext
|
16 |
+
*.pyc
|
17 |
+
*.pyd
|
18 |
+
*.so
|
19 |
+
*.dll
|
20 |
+
*.egg-info/
|
21 |
+
build/
|
22 |
+
dist/
|
23 |
+
wheels/
|
24 |
+
|
25 |
+
# pytorch/python/numpy formats
|
26 |
+
*.pth
|
27 |
+
*.pkl
|
28 |
+
*.npy
|
29 |
+
*.ts
|
30 |
+
model_ts*.txt
|
31 |
+
|
32 |
+
# ipython/jupyter notebooks
|
33 |
+
*.ipynb
|
34 |
+
**/.ipynb_checkpoints/
|
35 |
+
|
36 |
+
# Editor temporaries
|
37 |
+
*.swn
|
38 |
+
*.swo
|
39 |
+
*.swp
|
40 |
+
*~
|
41 |
+
|
42 |
+
# editor settings
|
43 |
+
.idea
|
44 |
+
.vscode
|
45 |
+
_darcs
|
46 |
+
|
47 |
+
# project dirs
|
48 |
+
/detectron2/model_zoo/configs
|
49 |
+
/datasets/*
|
50 |
+
!/datasets/*.*
|
51 |
+
/projects/*/datasets
|
52 |
+
/models
|
53 |
+
/snippet
|
detectron2/GETTING_STARTED.md
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Getting Started with Detectron2
|
2 |
+
|
3 |
+
This document provides a brief intro of the usage of builtin command-line tools in detectron2.
|
4 |
+
|
5 |
+
For a tutorial that involves actual coding with the API,
|
6 |
+
see our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
|
7 |
+
which covers how to run inference with an
|
8 |
+
existing model, and how to train a builtin model on a custom dataset.
|
9 |
+
|
10 |
+
|
11 |
+
### Inference Demo with Pre-trained Models
|
12 |
+
|
13 |
+
1. Pick a model and its config file from
|
14 |
+
[model zoo](MODEL_ZOO.md),
|
15 |
+
for example, `mask_rcnn_R_50_FPN_3x.yaml`.
|
16 |
+
2. We provide `demo.py` that is able to demo builtin configs. Run it with:
|
17 |
+
```
|
18 |
+
cd demo/
|
19 |
+
python demo.py --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
|
20 |
+
--input input1.jpg input2.jpg \
|
21 |
+
[--other-options]
|
22 |
+
--opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl
|
23 |
+
```
|
24 |
+
The configs are made for training, therefore we need to specify `MODEL.WEIGHTS` to a model from model zoo for evaluation.
|
25 |
+
This command will run the inference and show visualizations in an OpenCV window.
|
26 |
+
|
27 |
+
For details of the command line arguments, see `demo.py -h` or look at its source code
|
28 |
+
to understand its behavior. Some common arguments are:
|
29 |
+
* To run __on your webcam__, replace `--input files` with `--webcam`.
|
30 |
+
* To run __on a video__, replace `--input files` with `--video-input video.mp4`.
|
31 |
+
* To run __on cpu__, add `MODEL.DEVICE cpu` after `--opts`.
|
32 |
+
* To save outputs to a directory (for images) or a file (for webcam or video), use `--output`.
|
33 |
+
|
34 |
+
|
35 |
+
### Training & Evaluation in Command Line
|
36 |
+
|
37 |
+
We provide two scripts in "tools/plain_train_net.py" and "tools/train_net.py",
|
38 |
+
that are made to train all the configs provided in detectron2. You may want to
|
39 |
+
use it as a reference to write your own training script.
|
40 |
+
|
41 |
+
Compared to "train_net.py", "plain_train_net.py" supports fewer default
|
42 |
+
features. It also includes fewer abstraction, therefore is easier to add custom
|
43 |
+
logic.
|
44 |
+
|
45 |
+
To train a model with "train_net.py", first
|
46 |
+
setup the corresponding datasets following
|
47 |
+
[datasets/README.md](./datasets/README.md),
|
48 |
+
then run:
|
49 |
+
```
|
50 |
+
cd tools/
|
51 |
+
./train_net.py --num-gpus 8 \
|
52 |
+
--config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
|
53 |
+
```
|
54 |
+
|
55 |
+
The configs are made for 8-GPU training.
|
56 |
+
To train on 1 GPU, you may need to [change some parameters](https://arxiv.org/abs/1706.02677), e.g.:
|
57 |
+
```
|
58 |
+
./train_net.py \
|
59 |
+
--config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \
|
60 |
+
--num-gpus 1 SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025
|
61 |
+
```
|
62 |
+
|
63 |
+
To evaluate a model's performance, use
|
64 |
+
```
|
65 |
+
./train_net.py \
|
66 |
+
--config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \
|
67 |
+
--eval-only MODEL.WEIGHTS /path/to/checkpoint_file
|
68 |
+
```
|
69 |
+
For more options, see `./train_net.py -h`.
|
70 |
+
|
71 |
+
### Use Detectron2 APIs in Your Code
|
72 |
+
|
73 |
+
See our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
|
74 |
+
to learn how to use detectron2 APIs to:
|
75 |
+
1. run inference with an existing model
|
76 |
+
2. train a builtin model on a custom dataset
|
77 |
+
|
78 |
+
See [detectron2/projects](https://github.com/facebookresearch/detectron2/tree/main/projects)
|
79 |
+
for more ways to build your project on detectron2.
|
detectron2/INSTALL.md
ADDED
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Installation
|
2 |
+
|
3 |
+
### Requirements
|
4 |
+
- Linux or macOS with Python β₯ 3.6
|
5 |
+
- PyTorch β₯ 1.8 and [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation.
|
6 |
+
Install them together at [pytorch.org](https://pytorch.org) to make sure of this
|
7 |
+
- OpenCV is optional but needed by demo and visualization
|
8 |
+
|
9 |
+
|
10 |
+
### Build Detectron2 from Source
|
11 |
+
|
12 |
+
gcc & g++ β₯ 5.4 are required. [ninja](https://ninja-build.org/) is optional but recommended for faster build.
|
13 |
+
After having them, run:
|
14 |
+
```
|
15 |
+
python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
|
16 |
+
# (add --user if you don't have permission)
|
17 |
+
|
18 |
+
# Or, to install it from a local clone:
|
19 |
+
git clone https://github.com/facebookresearch/detectron2.git
|
20 |
+
python -m pip install -e detectron2
|
21 |
+
|
22 |
+
# On macOS, you may need to prepend the above commands with a few environment variables:
|
23 |
+
CC=clang CXX=clang++ ARCHFLAGS="-arch x86_64" python -m pip install ...
|
24 |
+
```
|
25 |
+
|
26 |
+
To __rebuild__ detectron2 that's built from a local clone, use `rm -rf build/ **/*.so` to clean the
|
27 |
+
old build first. You often need to rebuild detectron2 after reinstalling PyTorch.
|
28 |
+
|
29 |
+
### Install Pre-Built Detectron2 (Linux only)
|
30 |
+
|
31 |
+
Choose from this table to install [v0.6 (Oct 2021)](https://github.com/facebookresearch/detectron2/releases):
|
32 |
+
|
33 |
+
<table class="docutils"><tbody><th width="80"> CUDA </th><th valign="bottom" align="left" width="100">torch 1.10</th><th valign="bottom" align="left" width="100">torch 1.9</th><th valign="bottom" align="left" width="100">torch 1.8</th> <tr><td align="left">11.3</td><td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
|
34 |
+
https://dl.fbaipublicfiles.com/detectron2/wheels/cu113/torch1.10/index.html
|
35 |
+
</code></pre> </details> </td> <td align="left"> </td> <td align="left"> </td> </tr> <tr><td align="left">11.1</td><td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
|
36 |
+
https://dl.fbaipublicfiles.com/detectron2/wheels/cu111/torch1.10/index.html
|
37 |
+
</code></pre> </details> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
|
38 |
+
https://dl.fbaipublicfiles.com/detectron2/wheels/cu111/torch1.9/index.html
|
39 |
+
</code></pre> </details> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
|
40 |
+
https://dl.fbaipublicfiles.com/detectron2/wheels/cu111/torch1.8/index.html
|
41 |
+
</code></pre> </details> </td> </tr> <tr><td align="left">10.2</td><td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
|
42 |
+
https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.10/index.html
|
43 |
+
</code></pre> </details> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
|
44 |
+
https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html
|
45 |
+
</code></pre> </details> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
|
46 |
+
https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.8/index.html
|
47 |
+
</code></pre> </details> </td> </tr> <tr><td align="left">10.1</td><td align="left"> </td> <td align="left"> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
|
48 |
+
https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.8/index.html
|
49 |
+
</code></pre> </details> </td> </tr> <tr><td align="left">cpu</td><td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
|
50 |
+
https://dl.fbaipublicfiles.com/detectron2/wheels/cpu/torch1.10/index.html
|
51 |
+
</code></pre> </details> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
|
52 |
+
https://dl.fbaipublicfiles.com/detectron2/wheels/cpu/torch1.9/index.html
|
53 |
+
</code></pre> </details> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
|
54 |
+
https://dl.fbaipublicfiles.com/detectron2/wheels/cpu/torch1.8/index.html
|
55 |
+
</code></pre> </details> </td> </tr></tbody></table>
|
56 |
+
|
57 |
+
Note that:
|
58 |
+
1. The pre-built packages have to be used with corresponding version of CUDA and the official package of PyTorch.
|
59 |
+
Otherwise, please build detectron2 from source.
|
60 |
+
2. New packages are released every few months. Therefore, packages may not contain latest features in the main
|
61 |
+
branch and may not be compatible with the main branch of a research project that uses detectron2
|
62 |
+
(e.g. those in [projects](projects)).
|
63 |
+
|
64 |
+
### Common Installation Issues
|
65 |
+
|
66 |
+
Click each issue for its solutions:
|
67 |
+
|
68 |
+
<details>
|
69 |
+
<summary>
|
70 |
+
Undefined symbols that looks like "TH..","at::Tensor...","torch..."
|
71 |
+
</summary>
|
72 |
+
<br/>
|
73 |
+
|
74 |
+
This usually happens when detectron2 or torchvision is not
|
75 |
+
compiled with the version of PyTorch you're running.
|
76 |
+
|
77 |
+
If the error comes from a pre-built torchvision, uninstall torchvision and pytorch and reinstall them
|
78 |
+
following [pytorch.org](http://pytorch.org). So the versions will match.
|
79 |
+
|
80 |
+
If the error comes from a pre-built detectron2, check [release notes](https://github.com/facebookresearch/detectron2/releases),
|
81 |
+
uninstall and reinstall the correct pre-built detectron2 that matches pytorch version.
|
82 |
+
|
83 |
+
If the error comes from detectron2 or torchvision that you built manually from source,
|
84 |
+
remove files you built (`build/`, `**/*.so`) and rebuild it so it can pick up the version of pytorch currently in your environment.
|
85 |
+
|
86 |
+
If the above instructions do not resolve this problem, please provide an environment (e.g. a dockerfile) that can reproduce the issue.
|
87 |
+
</details>
|
88 |
+
|
89 |
+
<details>
|
90 |
+
<summary>
|
91 |
+
Missing torch dynamic libraries, OR segmentation fault immediately when using detectron2.
|
92 |
+
</summary>
|
93 |
+
This usually happens when detectron2 or torchvision is not
|
94 |
+
compiled with the version of PyTorch you're running. See the previous common issue for the solution.
|
95 |
+
</details>
|
96 |
+
|
97 |
+
<details>
|
98 |
+
<summary>
|
99 |
+
Undefined C++ symbols (e.g. "GLIBCXX..") or C++ symbols not found.
|
100 |
+
</summary>
|
101 |
+
<br/>
|
102 |
+
Usually it's because the library is compiled with a newer C++ compiler but run with an old C++ runtime.
|
103 |
+
|
104 |
+
This often happens with old anaconda.
|
105 |
+
It may help to run `conda update libgcc` to upgrade its runtime.
|
106 |
+
|
107 |
+
The fundamental solution is to avoid the mismatch, either by compiling using older version of C++
|
108 |
+
compiler, or run the code with proper C++ runtime.
|
109 |
+
To run the code with a specific C++ runtime, you can use environment variable `LD_PRELOAD=/path/to/libstdc++.so`.
|
110 |
+
|
111 |
+
</details>
|
112 |
+
|
113 |
+
<details>
|
114 |
+
<summary>
|
115 |
+
"nvcc not found" or "Not compiled with GPU support" or "Detectron2 CUDA Compiler: not available".
|
116 |
+
</summary>
|
117 |
+
<br/>
|
118 |
+
CUDA is not found when building detectron2.
|
119 |
+
You should make sure
|
120 |
+
|
121 |
+
```
|
122 |
+
python -c 'import torch; from torch.utils.cpp_extension import CUDA_HOME; print(torch.cuda.is_available(), CUDA_HOME)'
|
123 |
+
```
|
124 |
+
|
125 |
+
print `(True, a directory with cuda)` at the time you build detectron2.
|
126 |
+
|
127 |
+
Most models can run inference (but not training) without GPU support. To use CPUs, set `MODEL.DEVICE='cpu'` in the config.
|
128 |
+
</details>
|
129 |
+
|
130 |
+
<details>
|
131 |
+
<summary>
|
132 |
+
"invalid device function" or "no kernel image is available for execution".
|
133 |
+
</summary>
|
134 |
+
<br/>
|
135 |
+
Two possibilities:
|
136 |
+
|
137 |
+
* You build detectron2 with one version of CUDA but run it with a different version.
|
138 |
+
|
139 |
+
To check whether it is the case,
|
140 |
+
use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions.
|
141 |
+
In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA"
|
142 |
+
to contain cuda libraries of the same version.
|
143 |
+
|
144 |
+
When they are inconsistent,
|
145 |
+
you need to either install a different build of PyTorch (or build by yourself)
|
146 |
+
to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
|
147 |
+
|
148 |
+
* PyTorch/torchvision/Detectron2 is not built for the correct GPU SM architecture (aka. compute capability).
|
149 |
+
|
150 |
+
The architecture included by PyTorch/detectron2/torchvision is available in the "architecture flags" in
|
151 |
+
`python -m detectron2.utils.collect_env`. It must include
|
152 |
+
the architecture of your GPU, which can be found at [developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus).
|
153 |
+
|
154 |
+
If you're using pre-built PyTorch/detectron2/torchvision, they have included support for most popular GPUs already.
|
155 |
+
If not supported, you need to build them from source.
|
156 |
+
|
157 |
+
When building detectron2/torchvision from source, they detect the GPU device and build for only the device.
|
158 |
+
This means the compiled code may not work on a different GPU device.
|
159 |
+
To recompile them for the correct architecture, remove all installed/compiled files,
|
160 |
+
and rebuild them with the `TORCH_CUDA_ARCH_LIST` environment variable set properly.
|
161 |
+
For example, `export TORCH_CUDA_ARCH_LIST="6.0;7.0"` makes it compile for both P100s and V100s.
|
162 |
+
</details>
|
163 |
+
|
164 |
+
<details>
|
165 |
+
<summary>
|
166 |
+
Undefined CUDA symbols; Cannot open libcudart.so
|
167 |
+
</summary>
|
168 |
+
<br/>
|
169 |
+
The version of NVCC you use to build detectron2 or torchvision does
|
170 |
+
not match the version of CUDA you are running with.
|
171 |
+
This often happens when using anaconda's CUDA runtime.
|
172 |
+
|
173 |
+
Use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions.
|
174 |
+
In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA"
|
175 |
+
to contain cuda libraries of the same version.
|
176 |
+
|
177 |
+
When they are inconsistent,
|
178 |
+
you need to either install a different build of PyTorch (or build by yourself)
|
179 |
+
to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
|
180 |
+
</details>
|
181 |
+
|
182 |
+
|
183 |
+
<details>
|
184 |
+
<summary>
|
185 |
+
C++ compilation errors from NVCC / NVRTC, or "Unsupported gpu architecture"
|
186 |
+
</summary>
|
187 |
+
<br/>
|
188 |
+
A few possibilities:
|
189 |
+
|
190 |
+
1. Local CUDA/NVCC version has to match the CUDA version of your PyTorch. Both can be found in `python collect_env.py`
|
191 |
+
(download from [here](./detectron2/utils/collect_env.py)).
|
192 |
+
When they are inconsistent, you need to either install a different build of PyTorch (or build by yourself)
|
193 |
+
to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
|
194 |
+
|
195 |
+
2. Local CUDA/NVCC version shall support the SM architecture (a.k.a. compute capability) of your GPU.
|
196 |
+
The capability of your GPU can be found at [developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus).
|
197 |
+
The capability supported by NVCC is listed at [here](https://gist.github.com/ax3l/9489132).
|
198 |
+
If your NVCC version is too old, this can be workaround by setting environment variable
|
199 |
+
`TORCH_CUDA_ARCH_LIST` to a lower, supported capability.
|
200 |
+
|
201 |
+
3. The combination of NVCC and GCC you use is incompatible. You need to change one of their versions.
|
202 |
+
See [here](https://gist.github.com/ax3l/9489132) for some valid combinations.
|
203 |
+
Notably, CUDA<=10.1.105 doesn't support GCC>7.3.
|
204 |
+
|
205 |
+
The CUDA/GCC version used by PyTorch can be found by `print(torch.__config__.show())`.
|
206 |
+
|
207 |
+
</details>
|
208 |
+
|
209 |
+
|
210 |
+
<details>
|
211 |
+
<summary>
|
212 |
+
"ImportError: cannot import name '_C'".
|
213 |
+
</summary>
|
214 |
+
<br/>
|
215 |
+
Please build and install detectron2 following the instructions above.
|
216 |
+
|
217 |
+
Or, if you are running code from detectron2's root directory, `cd` to a different one.
|
218 |
+
Otherwise you may not import the code that you installed.
|
219 |
+
</details>
|
220 |
+
|
221 |
+
|
222 |
+
<details>
|
223 |
+
<summary>
|
224 |
+
Any issue on windows.
|
225 |
+
</summary>
|
226 |
+
<br/>
|
227 |
+
|
228 |
+
Detectron2 is continuously built on windows with [CircleCI](https://app.circleci.com/pipelines/github/facebookresearch/detectron2?branch=main).
|
229 |
+
However we do not provide official support for it.
|
230 |
+
PRs that improves code compatibility on windows are welcome.
|
231 |
+
</details>
|
232 |
+
|
233 |
+
<details>
|
234 |
+
<summary>
|
235 |
+
ONNX conversion segfault after some "TraceWarning".
|
236 |
+
</summary>
|
237 |
+
<br/>
|
238 |
+
The ONNX package is compiled with a too old compiler.
|
239 |
+
|
240 |
+
Please build and install ONNX from its source code using a compiler
|
241 |
+
whose version is closer to what's used by PyTorch (available in `torch.__config__.show()`).
|
242 |
+
</details>
|
243 |
+
|
244 |
+
|
245 |
+
<details>
|
246 |
+
<summary>
|
247 |
+
"library not found for -lstdc++" on older version of MacOS
|
248 |
+
</summary>
|
249 |
+
<br/>
|
250 |
+
See
|
251 |
+
[this stackoverflow answer](https://stackoverflow.com/questions/56083725/macos-build-issues-lstdc-not-found-while-building-python-package).
|
252 |
+
|
253 |
+
</details>
|
254 |
+
|
255 |
+
|
256 |
+
### Installation inside specific environments:
|
257 |
+
|
258 |
+
* __Colab__: see our [Colab Tutorial](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
|
259 |
+
which has step-by-step instructions.
|
260 |
+
|
261 |
+
* __Docker__: The official [Dockerfile](docker) installs detectron2 with a few simple commands.
|
262 |
+
|
detectron2/LICENSE
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright [yyyy] [name of copyright owner]
|
190 |
+
|
191 |
+
|
192 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
193 |
+
you may not use this file except in compliance with the License.
|
194 |
+
You may obtain a copy of the License at
|
195 |
+
|
196 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
197 |
+
|
198 |
+
Unless required by applicable law or agreed to in writing, software
|
199 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
200 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
201 |
+
See the License for the specific language governing permissions and
|
202 |
+
limitations under the License.
|
detectron2/MODEL_ZOO.md
ADDED
@@ -0,0 +1,1052 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Detectron2 Model Zoo and Baselines
|
2 |
+
|
3 |
+
## Introduction
|
4 |
+
|
5 |
+
This file documents a large collection of baselines trained
|
6 |
+
with detectron2 in Sep-Oct, 2019.
|
7 |
+
All numbers were obtained on [Big Basin](https://engineering.fb.com/data-center-engineering/introducing-big-basin-our-next-generation-ai-hardware/)
|
8 |
+
servers with 8 NVIDIA V100 GPUs & NVLink. The speed numbers are periodically updated with latest PyTorch/CUDA/cuDNN versions.
|
9 |
+
You can access these models from code using [detectron2.model_zoo](https://detectron2.readthedocs.io/modules/model_zoo.html) APIs.
|
10 |
+
|
11 |
+
In addition to these official baseline models, you can find more models in [projects/](projects/).
|
12 |
+
|
13 |
+
#### How to Read the Tables
|
14 |
+
* The "Name" column contains a link to the config file. Models can be reproduced using `tools/train_net.py` with the corresponding yaml config file,
|
15 |
+
or `tools/lazyconfig_train_net.py` for python config files.
|
16 |
+
* Training speed is averaged across the entire training.
|
17 |
+
We keep updating the speed with latest version of detectron2/pytorch/etc.,
|
18 |
+
so they might be different from the `metrics` file.
|
19 |
+
Training speed for multi-machine jobs is not provided.
|
20 |
+
* Inference speed is measured by `tools/train_net.py --eval-only`, or [inference_on_dataset()](https://detectron2.readthedocs.io/modules/evaluation.html#detectron2.evaluation.inference_on_dataset),
|
21 |
+
with batch size 1 in detectron2 directly.
|
22 |
+
Measuring it with custom code may introduce other overhead.
|
23 |
+
Actual deployment in production should in general be faster than the given inference
|
24 |
+
speed due to more optimizations.
|
25 |
+
* The *model id* column is provided for ease of reference.
|
26 |
+
To check downloaded file integrity, any model on this page contains its md5 prefix in its file name.
|
27 |
+
* Training curves and other statistics can be found in `metrics` for each model.
|
28 |
+
|
29 |
+
#### Common Settings for COCO Models
|
30 |
+
* All COCO models were trained on `train2017` and evaluated on `val2017`.
|
31 |
+
* The default settings are __not directly comparable__ with Detectron's standard settings.
|
32 |
+
For example, our default training data augmentation uses scale jittering in addition to horizontal flipping.
|
33 |
+
|
34 |
+
To make fair comparisons with Detectron's settings, see
|
35 |
+
[Detectron1-Comparisons](configs/Detectron1-Comparisons/) for accuracy comparison,
|
36 |
+
and [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html)
|
37 |
+
for speed comparison.
|
38 |
+
* For Faster/Mask R-CNN, we provide baselines based on __3 different backbone combinations__:
|
39 |
+
* __FPN__: Use a ResNet+FPN backbone with standard conv and FC heads for mask and box prediction,
|
40 |
+
respectively. It obtains the best
|
41 |
+
speed/accuracy tradeoff, but the other two are still useful for research.
|
42 |
+
* __C4__: Use a ResNet conv4 backbone with conv5 head. The original baseline in the Faster R-CNN paper.
|
43 |
+
* __DC5__ (Dilated-C5): Use a ResNet conv5 backbone with dilations in conv5, and standard conv and FC heads
|
44 |
+
for mask and box prediction, respectively.
|
45 |
+
This is used by the Deformable ConvNet paper.
|
46 |
+
* Most models are trained with the 3x schedule (~37 COCO epochs).
|
47 |
+
Although 1x models are heavily under-trained, we provide some ResNet-50 models with the 1x (~12 COCO epochs)
|
48 |
+
training schedule for comparison when doing quick research iteration.
|
49 |
+
|
50 |
+
#### ImageNet Pretrained Models
|
51 |
+
|
52 |
+
It's common to initialize from backbone models pre-trained on ImageNet classification tasks. The following backbone models are available:
|
53 |
+
|
54 |
+
* [R-50.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-50.pkl): converted copy of [MSRA's original ResNet-50](https://github.com/KaimingHe/deep-residual-networks) model.
|
55 |
+
* [R-101.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-101.pkl): converted copy of [MSRA's original ResNet-101](https://github.com/KaimingHe/deep-residual-networks) model.
|
56 |
+
* [X-101-32x8d.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/FAIR/X-101-32x8d.pkl): ResNeXt-101-32x8d model trained with Caffe2 at FB.
|
57 |
+
* [R-50.pkl (torchvision)](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/torchvision/R-50.pkl): converted copy of [torchvision's ResNet-50](https://pytorch.org/docs/stable/torchvision/models.html#torchvision.models.resnet50) model.
|
58 |
+
More details can be found in [the conversion script](tools/convert-torchvision-to-d2.py).
|
59 |
+
|
60 |
+
Note that the above models have __different__ format from those provided in Detectron: we do not fuse BatchNorm into an affine layer.
|
61 |
+
Pretrained models in Detectron's format can still be used. For example:
|
62 |
+
* [X-152-32x8d-IN5k.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl):
|
63 |
+
ResNeXt-152-32x8d model trained on ImageNet-5k with Caffe2 at FB (see ResNeXt paper for details on ImageNet-5k).
|
64 |
+
* [R-50-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl):
|
65 |
+
ResNet-50 with Group Normalization.
|
66 |
+
* [R-101-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl):
|
67 |
+
ResNet-101 with Group Normalization.
|
68 |
+
|
69 |
+
These models require slightly different settings regarding normalization and architecture. See the model zoo configs for reference.
|
70 |
+
|
71 |
+
#### License
|
72 |
+
|
73 |
+
All models available for download through this document are licensed under the
|
74 |
+
[Creative Commons Attribution-ShareAlike 3.0 license](https://creativecommons.org/licenses/by-sa/3.0/).
|
75 |
+
|
76 |
+
### COCO Object Detection Baselines
|
77 |
+
|
78 |
+
#### Faster R-CNN:
|
79 |
+
<!--
|
80 |
+
(fb only) To update the table in vim:
|
81 |
+
1. Remove the old table: d}
|
82 |
+
2. Copy the below command to the place of the table
|
83 |
+
3. :.!bash
|
84 |
+
|
85 |
+
./gen_html_table.py --config 'COCO-Detection/faster*50*'{1x,3x}'*' 'COCO-Detection/faster*101*' --name R50-C4 R50-DC5 R50-FPN R50-C4 R50-DC5 R50-FPN R101-C4 R101-DC5 R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP
|
86 |
+
-->
|
87 |
+
|
88 |
+
|
89 |
+
<table><tbody>
|
90 |
+
<!-- START TABLE -->
|
91 |
+
<!-- TABLE HEADER -->
|
92 |
+
<th valign="bottom">Name</th>
|
93 |
+
<th valign="bottom">lr<br/>sched</th>
|
94 |
+
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
|
95 |
+
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
|
96 |
+
<th valign="bottom">train<br/>mem<br/>(GB)</th>
|
97 |
+
<th valign="bottom">box<br/>AP</th>
|
98 |
+
<th valign="bottom">model id</th>
|
99 |
+
<th valign="bottom">download</th>
|
100 |
+
<!-- TABLE BODY -->
|
101 |
+
<!-- ROW: faster_rcnn_R_50_C4_1x -->
|
102 |
+
<tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml">R50-C4</a></td>
|
103 |
+
<td align="center">1x</td>
|
104 |
+
<td align="center">0.551</td>
|
105 |
+
<td align="center">0.102</td>
|
106 |
+
<td align="center">4.8</td>
|
107 |
+
<td align="center">35.7</td>
|
108 |
+
<td align="center">137257644</td>
|
109 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/137257644/model_final_721ade.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/137257644/metrics.json">metrics</a></td>
|
110 |
+
</tr>
|
111 |
+
<!-- ROW: faster_rcnn_R_50_DC5_1x -->
|
112 |
+
<tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml">R50-DC5</a></td>
|
113 |
+
<td align="center">1x</td>
|
114 |
+
<td align="center">0.380</td>
|
115 |
+
<td align="center">0.068</td>
|
116 |
+
<td align="center">5.0</td>
|
117 |
+
<td align="center">37.3</td>
|
118 |
+
<td align="center">137847829</td>
|
119 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_1x/137847829/model_final_51d356.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_1x/137847829/metrics.json">metrics</a></td>
|
120 |
+
</tr>
|
121 |
+
<!-- ROW: faster_rcnn_R_50_FPN_1x -->
|
122 |
+
<tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
|
123 |
+
<td align="center">1x</td>
|
124 |
+
<td align="center">0.210</td>
|
125 |
+
<td align="center">0.038</td>
|
126 |
+
<td align="center">3.0</td>
|
127 |
+
<td align="center">37.9</td>
|
128 |
+
<td align="center">137257794</td>
|
129 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_1x/137257794/model_final_b275ba.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_1x/137257794/metrics.json">metrics</a></td>
|
130 |
+
</tr>
|
131 |
+
<!-- ROW: faster_rcnn_R_50_C4_3x -->
|
132 |
+
<tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml">R50-C4</a></td>
|
133 |
+
<td align="center">3x</td>
|
134 |
+
<td align="center">0.543</td>
|
135 |
+
<td align="center">0.104</td>
|
136 |
+
<td align="center">4.8</td>
|
137 |
+
<td align="center">38.4</td>
|
138 |
+
<td align="center">137849393</td>
|
139 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_3x/137849393/model_final_f97cb7.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_3x/137849393/metrics.json">metrics</a></td>
|
140 |
+
</tr>
|
141 |
+
<!-- ROW: faster_rcnn_R_50_DC5_3x -->
|
142 |
+
<tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml">R50-DC5</a></td>
|
143 |
+
<td align="center">3x</td>
|
144 |
+
<td align="center">0.378</td>
|
145 |
+
<td align="center">0.070</td>
|
146 |
+
<td align="center">5.0</td>
|
147 |
+
<td align="center">39.0</td>
|
148 |
+
<td align="center">137849425</td>
|
149 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_3x/137849425/model_final_68d202.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_3x/137849425/metrics.json">metrics</a></td>
|
150 |
+
</tr>
|
151 |
+
<!-- ROW: faster_rcnn_R_50_FPN_3x -->
|
152 |
+
<tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml">R50-FPN</a></td>
|
153 |
+
<td align="center">3x</td>
|
154 |
+
<td align="center">0.209</td>
|
155 |
+
<td align="center">0.038</td>
|
156 |
+
<td align="center">3.0</td>
|
157 |
+
<td align="center">40.2</td>
|
158 |
+
<td align="center">137849458</td>
|
159 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/metrics.json">metrics</a></td>
|
160 |
+
</tr>
|
161 |
+
<!-- ROW: faster_rcnn_R_101_C4_3x -->
|
162 |
+
<tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml">R101-C4</a></td>
|
163 |
+
<td align="center">3x</td>
|
164 |
+
<td align="center">0.619</td>
|
165 |
+
<td align="center">0.139</td>
|
166 |
+
<td align="center">5.9</td>
|
167 |
+
<td align="center">41.1</td>
|
168 |
+
<td align="center">138204752</td>
|
169 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_C4_3x/138204752/model_final_298dad.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_C4_3x/138204752/metrics.json">metrics</a></td>
|
170 |
+
</tr>
|
171 |
+
<!-- ROW: faster_rcnn_R_101_DC5_3x -->
|
172 |
+
<tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml">R101-DC5</a></td>
|
173 |
+
<td align="center">3x</td>
|
174 |
+
<td align="center">0.452</td>
|
175 |
+
<td align="center">0.086</td>
|
176 |
+
<td align="center">6.1</td>
|
177 |
+
<td align="center">40.6</td>
|
178 |
+
<td align="center">138204841</td>
|
179 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_DC5_3x/138204841/model_final_3e0943.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_DC5_3x/138204841/metrics.json">metrics</a></td>
|
180 |
+
</tr>
|
181 |
+
<!-- ROW: faster_rcnn_R_101_FPN_3x -->
|
182 |
+
<tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml">R101-FPN</a></td>
|
183 |
+
<td align="center">3x</td>
|
184 |
+
<td align="center">0.286</td>
|
185 |
+
<td align="center">0.051</td>
|
186 |
+
<td align="center">4.1</td>
|
187 |
+
<td align="center">42.0</td>
|
188 |
+
<td align="center">137851257</td>
|
189 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/model_final_f6e8b1.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/metrics.json">metrics</a></td>
|
190 |
+
</tr>
|
191 |
+
<!-- ROW: faster_rcnn_X_101_32x8d_FPN_3x -->
|
192 |
+
<tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml">X101-FPN</a></td>
|
193 |
+
<td align="center">3x</td>
|
194 |
+
<td align="center">0.638</td>
|
195 |
+
<td align="center">0.098</td>
|
196 |
+
<td align="center">6.7</td>
|
197 |
+
<td align="center">43.0</td>
|
198 |
+
<td align="center">139173657</td>
|
199 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x/139173657/model_final_68b088.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x/139173657/metrics.json">metrics</a></td>
|
200 |
+
</tr>
|
201 |
+
</tbody></table>
|
202 |
+
|
203 |
+
#### RetinaNet:
|
204 |
+
<!--
|
205 |
+
./gen_html_table.py --config 'COCO-Detection/retina*50*' 'COCO-Detection/retina*101*' --name R50 R50 R101 --fields lr_sched train_speed inference_speed mem box_AP
|
206 |
+
-->
|
207 |
+
|
208 |
+
<table><tbody>
|
209 |
+
<!-- START TABLE -->
|
210 |
+
<!-- TABLE HEADER -->
|
211 |
+
<th valign="bottom">Name</th>
|
212 |
+
<th valign="bottom">lr<br/>sched</th>
|
213 |
+
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
|
214 |
+
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
|
215 |
+
<th valign="bottom">train<br/>mem<br/>(GB)</th>
|
216 |
+
<th valign="bottom">box<br/>AP</th>
|
217 |
+
<th valign="bottom">model id</th>
|
218 |
+
<th valign="bottom">download</th>
|
219 |
+
<!-- TABLE BODY -->
|
220 |
+
<!-- ROW: retinanet_R_50_FPN_1x -->
|
221 |
+
<tr><td align="left"><a href="configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml">R50</a></td>
|
222 |
+
<td align="center">1x</td>
|
223 |
+
<td align="center">0.205</td>
|
224 |
+
<td align="center">0.041</td>
|
225 |
+
<td align="center">4.1</td>
|
226 |
+
<td align="center">37.4</td>
|
227 |
+
<td align="center">190397773</td>
|
228 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_1x/190397773/model_final_bfca0b.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_1x/190397773/metrics.json">metrics</a></td>
|
229 |
+
</tr>
|
230 |
+
<!-- ROW: retinanet_R_50_FPN_3x -->
|
231 |
+
<tr><td align="left"><a href="configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml">R50</a></td>
|
232 |
+
<td align="center">3x</td>
|
233 |
+
<td align="center">0.205</td>
|
234 |
+
<td align="center">0.041</td>
|
235 |
+
<td align="center">4.1</td>
|
236 |
+
<td align="center">38.7</td>
|
237 |
+
<td align="center">190397829</td>
|
238 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_3x/190397829/metrics.json">metrics</a></td>
|
239 |
+
</tr>
|
240 |
+
<!-- ROW: retinanet_R_101_FPN_3x -->
|
241 |
+
<tr><td align="left"><a href="configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml">R101</a></td>
|
242 |
+
<td align="center">3x</td>
|
243 |
+
<td align="center">0.291</td>
|
244 |
+
<td align="center">0.054</td>
|
245 |
+
<td align="center">5.2</td>
|
246 |
+
<td align="center">40.4</td>
|
247 |
+
<td align="center">190397697</td>
|
248 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_101_FPN_3x/190397697/model_final_971ab9.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_101_FPN_3x/190397697/metrics.json">metrics</a></td>
|
249 |
+
</tr>
|
250 |
+
</tbody></table>
|
251 |
+
|
252 |
+
|
253 |
+
#### RPN & Fast R-CNN:
|
254 |
+
<!--
|
255 |
+
./gen_html_table.py --config 'COCO-Detection/rpn*' 'COCO-Detection/fast_rcnn*' --name "RPN R50-C4" "RPN R50-FPN" "Fast R-CNN R50-FPN" --fields lr_sched train_speed inference_speed mem box_AP prop_AR
|
256 |
+
-->
|
257 |
+
|
258 |
+
<table><tbody>
|
259 |
+
<!-- START TABLE -->
|
260 |
+
<!-- TABLE HEADER -->
|
261 |
+
<th valign="bottom">Name</th>
|
262 |
+
<th valign="bottom">lr<br/>sched</th>
|
263 |
+
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
|
264 |
+
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
|
265 |
+
<th valign="bottom">train<br/>mem<br/>(GB)</th>
|
266 |
+
<th valign="bottom">box<br/>AP</th>
|
267 |
+
<th valign="bottom">prop.<br/>AR</th>
|
268 |
+
<th valign="bottom">model id</th>
|
269 |
+
<th valign="bottom">download</th>
|
270 |
+
<!-- TABLE BODY -->
|
271 |
+
<!-- ROW: rpn_R_50_C4_1x -->
|
272 |
+
<tr><td align="left"><a href="configs/COCO-Detection/rpn_R_50_C4_1x.yaml">RPN R50-C4</a></td>
|
273 |
+
<td align="center">1x</td>
|
274 |
+
<td align="center">0.130</td>
|
275 |
+
<td align="center">0.034</td>
|
276 |
+
<td align="center">1.5</td>
|
277 |
+
<td align="center"></td>
|
278 |
+
<td align="center">51.6</td>
|
279 |
+
<td align="center">137258005</td>
|
280 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_C4_1x/137258005/model_final_450694.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_C4_1x/137258005/metrics.json">metrics</a></td>
|
281 |
+
</tr>
|
282 |
+
<!-- ROW: rpn_R_50_FPN_1x -->
|
283 |
+
<tr><td align="left"><a href="configs/COCO-Detection/rpn_R_50_FPN_1x.yaml">RPN R50-FPN</a></td>
|
284 |
+
<td align="center">1x</td>
|
285 |
+
<td align="center">0.186</td>
|
286 |
+
<td align="center">0.032</td>
|
287 |
+
<td align="center">2.7</td>
|
288 |
+
<td align="center"></td>
|
289 |
+
<td align="center">58.0</td>
|
290 |
+
<td align="center">137258492</td>
|
291 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_FPN_1x/137258492/metrics.json">metrics</a></td>
|
292 |
+
</tr>
|
293 |
+
<!-- ROW: fast_rcnn_R_50_FPN_1x -->
|
294 |
+
<tr><td align="left"><a href="configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml">Fast R-CNN R50-FPN</a></td>
|
295 |
+
<td align="center">1x</td>
|
296 |
+
<td align="center">0.140</td>
|
297 |
+
<td align="center">0.029</td>
|
298 |
+
<td align="center">2.6</td>
|
299 |
+
<td align="center">37.8</td>
|
300 |
+
<td align="center"></td>
|
301 |
+
<td align="center">137635226</td>
|
302 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/metrics.json">metrics</a></td>
|
303 |
+
</tr>
|
304 |
+
</tbody></table>
|
305 |
+
|
306 |
+
### COCO Instance Segmentation Baselines with Mask R-CNN
|
307 |
+
<!--
|
308 |
+
./gen_html_table.py --config 'COCO-InstanceSegmentation/mask*50*'{1x,3x}'*' 'COCO-InstanceSegmentation/mask*101*' --name R50-C4 R50-DC5 R50-FPN R50-C4 R50-DC5 R50-FPN R101-C4 R101-DC5 R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP mask_AP
|
309 |
+
-->
|
310 |
+
|
311 |
+
|
312 |
+
|
313 |
+
<table><tbody>
|
314 |
+
<!-- START TABLE -->
|
315 |
+
<!-- TABLE HEADER -->
|
316 |
+
<th valign="bottom">Name</th>
|
317 |
+
<th valign="bottom">lr<br/>sched</th>
|
318 |
+
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
|
319 |
+
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
|
320 |
+
<th valign="bottom">train<br/>mem<br/>(GB)</th>
|
321 |
+
<th valign="bottom">box<br/>AP</th>
|
322 |
+
<th valign="bottom">mask<br/>AP</th>
|
323 |
+
<th valign="bottom">model id</th>
|
324 |
+
<th valign="bottom">download</th>
|
325 |
+
<!-- TABLE BODY -->
|
326 |
+
<!-- ROW: mask_rcnn_R_50_C4_1x -->
|
327 |
+
<tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml">R50-C4</a></td>
|
328 |
+
<td align="center">1x</td>
|
329 |
+
<td align="center">0.584</td>
|
330 |
+
<td align="center">0.110</td>
|
331 |
+
<td align="center">5.2</td>
|
332 |
+
<td align="center">36.8</td>
|
333 |
+
<td align="center">32.2</td>
|
334 |
+
<td align="center">137259246</td>
|
335 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x/137259246/model_final_9243eb.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x/137259246/metrics.json">metrics</a></td>
|
336 |
+
</tr>
|
337 |
+
<!-- ROW: mask_rcnn_R_50_DC5_1x -->
|
338 |
+
<tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml">R50-DC5</a></td>
|
339 |
+
<td align="center">1x</td>
|
340 |
+
<td align="center">0.471</td>
|
341 |
+
<td align="center">0.076</td>
|
342 |
+
<td align="center">6.5</td>
|
343 |
+
<td align="center">38.3</td>
|
344 |
+
<td align="center">34.2</td>
|
345 |
+
<td align="center">137260150</td>
|
346 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x/137260150/model_final_4f86c3.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x/137260150/metrics.json">metrics</a></td>
|
347 |
+
</tr>
|
348 |
+
<!-- ROW: mask_rcnn_R_50_FPN_1x -->
|
349 |
+
<tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
|
350 |
+
<td align="center">1x</td>
|
351 |
+
<td align="center">0.261</td>
|
352 |
+
<td align="center">0.043</td>
|
353 |
+
<td align="center">3.4</td>
|
354 |
+
<td align="center">38.6</td>
|
355 |
+
<td align="center">35.2</td>
|
356 |
+
<td align="center">137260431</td>
|
357 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/model_final_a54504.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/metrics.json">metrics</a></td>
|
358 |
+
</tr>
|
359 |
+
<!-- ROW: mask_rcnn_R_50_C4_3x -->
|
360 |
+
<tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml">R50-C4</a></td>
|
361 |
+
<td align="center">3x</td>
|
362 |
+
<td align="center">0.575</td>
|
363 |
+
<td align="center">0.111</td>
|
364 |
+
<td align="center">5.2</td>
|
365 |
+
<td align="center">39.8</td>
|
366 |
+
<td align="center">34.4</td>
|
367 |
+
<td align="center">137849525</td>
|
368 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/metrics.json">metrics</a></td>
|
369 |
+
</tr>
|
370 |
+
<!-- ROW: mask_rcnn_R_50_DC5_3x -->
|
371 |
+
<tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml">R50-DC5</a></td>
|
372 |
+
<td align="center">3x</td>
|
373 |
+
<td align="center">0.470</td>
|
374 |
+
<td align="center">0.076</td>
|
375 |
+
<td align="center">6.5</td>
|
376 |
+
<td align="center">40.0</td>
|
377 |
+
<td align="center">35.9</td>
|
378 |
+
<td align="center">137849551</td>
|
379 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/metrics.json">metrics</a></td>
|
380 |
+
</tr>
|
381 |
+
<!-- ROW: mask_rcnn_R_50_FPN_3x -->
|
382 |
+
<tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml">R50-FPN</a></td>
|
383 |
+
<td align="center">3x</td>
|
384 |
+
<td align="center">0.261</td>
|
385 |
+
<td align="center">0.043</td>
|
386 |
+
<td align="center">3.4</td>
|
387 |
+
<td align="center">41.0</td>
|
388 |
+
<td align="center">37.2</td>
|
389 |
+
<td align="center">137849600</td>
|
390 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/metrics.json">metrics</a></td>
|
391 |
+
</tr>
|
392 |
+
<!-- ROW: mask_rcnn_R_101_C4_3x -->
|
393 |
+
<tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml">R101-C4</a></td>
|
394 |
+
<td align="center">3x</td>
|
395 |
+
<td align="center">0.652</td>
|
396 |
+
<td align="center">0.145</td>
|
397 |
+
<td align="center">6.3</td>
|
398 |
+
<td align="center">42.6</td>
|
399 |
+
<td align="center">36.7</td>
|
400 |
+
<td align="center">138363239</td>
|
401 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x/138363239/model_final_a2914c.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x/138363239/metrics.json">metrics</a></td>
|
402 |
+
</tr>
|
403 |
+
<!-- ROW: mask_rcnn_R_101_DC5_3x -->
|
404 |
+
<tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml">R101-DC5</a></td>
|
405 |
+
<td align="center">3x</td>
|
406 |
+
<td align="center">0.545</td>
|
407 |
+
<td align="center">0.092</td>
|
408 |
+
<td align="center">7.6</td>
|
409 |
+
<td align="center">41.9</td>
|
410 |
+
<td align="center">37.3</td>
|
411 |
+
<td align="center">138363294</td>
|
412 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x/138363294/model_final_0464b7.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x/138363294/metrics.json">metrics</a></td>
|
413 |
+
</tr>
|
414 |
+
<!-- ROW: mask_rcnn_R_101_FPN_3x -->
|
415 |
+
<tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml">R101-FPN</a></td>
|
416 |
+
<td align="center">3x</td>
|
417 |
+
<td align="center">0.340</td>
|
418 |
+
<td align="center">0.056</td>
|
419 |
+
<td align="center">4.6</td>
|
420 |
+
<td align="center">42.9</td>
|
421 |
+
<td align="center">38.6</td>
|
422 |
+
<td align="center">138205316</td>
|
423 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/model_final_a3ec72.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/metrics.json">metrics</a></td>
|
424 |
+
</tr>
|
425 |
+
<!-- ROW: mask_rcnn_X_101_32x8d_FPN_3x -->
|
426 |
+
<tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml">X101-FPN</a></td>
|
427 |
+
<td align="center">3x</td>
|
428 |
+
<td align="center">0.690</td>
|
429 |
+
<td align="center">0.103</td>
|
430 |
+
<td align="center">7.2</td>
|
431 |
+
<td align="center">44.3</td>
|
432 |
+
<td align="center">39.5</td>
|
433 |
+
<td align="center">139653917</td>
|
434 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x/139653917/model_final_2d9806.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x/139653917/metrics.json">metrics</a></td>
|
435 |
+
</tr>
|
436 |
+
</tbody></table>
|
437 |
+
|
438 |
+
|
439 |
+
|
440 |
+
#### New baselines using Large-Scale Jitter and Longer Training Schedule
|
441 |
+
|
442 |
+
The following baselines of COCO Instance Segmentation with Mask R-CNN are generated
|
443 |
+
using a longer training schedule and large-scale jitter as described in Google's
|
444 |
+
[Simple Copy-Paste Data Augmentation](https://arxiv.org/pdf/2012.07177.pdf) paper. These
|
445 |
+
models are trained from scratch using random initialization. These baselines exceed the
|
446 |
+
previous Mask R-CNN baselines.
|
447 |
+
|
448 |
+
In the following table, one epoch consists of training on 118000 COCO images.
|
449 |
+
|
450 |
+
<table><tbody>
|
451 |
+
<!-- START TABLE -->
|
452 |
+
<!-- TABLE HEADER -->
|
453 |
+
<th valign="bottom">Name</th>
|
454 |
+
<th valign="bottom">epochs</th>
|
455 |
+
<th valign="bottom">train<br/>time<br/>(s/im)</th>
|
456 |
+
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
|
457 |
+
<th valign="bottom">box<br/>AP</th>
|
458 |
+
<th valign="bottom">mask<br/>AP</th>
|
459 |
+
<th valign="bottom">model id</th>
|
460 |
+
<th valign="bottom">download</th>
|
461 |
+
<!-- TABLE BODY -->
|
462 |
+
<!-- ROW: mask_rcnn_R_50_FPN_100ep_LSJ -->
|
463 |
+
<tr><td align="left"><a href="configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py">R50-FPN</a></td>
|
464 |
+
<td align="center">100</td>
|
465 |
+
<td align="center">0.376</td>
|
466 |
+
<td align="center">0.069</td>
|
467 |
+
<td align="center">44.6</td>
|
468 |
+
<td align="center">40.3</td>
|
469 |
+
<td align="center">42047764</td>
|
470 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ/42047764/model_final_bb69de.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ/42047764/metrics.json">metrics</a></td>
|
471 |
+
</tr>
|
472 |
+
<!-- ROW: mask_rcnn_R_50_FPN_200ep_LSJ -->
|
473 |
+
<tr><td align="left"><a href="configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py">R50-FPN</a></td>
|
474 |
+
<td align="center">200</td>
|
475 |
+
<td align="center">0.376</td>
|
476 |
+
<td align="center">0.069</td>
|
477 |
+
<td align="center">46.3</td>
|
478 |
+
<td align="center">41.7</td>
|
479 |
+
<td align="center">42047638</td>
|
480 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ/42047638/model_final_89a8d3.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ/42047638/metrics.json">metrics</a></td>
|
481 |
+
</tr>
|
482 |
+
<!-- ROW: mask_rcnn_R_50_FPN_400ep_LSJ -->
|
483 |
+
<tr><td align="left"><a href="configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py">R50-FPN</a></td>
|
484 |
+
<td align="center">400</td>
|
485 |
+
<td align="center">0.376</td>
|
486 |
+
<td align="center">0.069</td>
|
487 |
+
<td align="center">47.4</td>
|
488 |
+
<td align="center">42.5</td>
|
489 |
+
<td align="center">42019571</td>
|
490 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ/42019571/model_final_14d201.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ/42019571/metrics.json">metrics</a></td>
|
491 |
+
</tr>
|
492 |
+
<!-- ROW: mask_rcnn_R_101_FPN_100ep_LSJ -->
|
493 |
+
<tr><td align="left"><a href="configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py">R101-FPN</a></td>
|
494 |
+
<td align="center">100</td>
|
495 |
+
<td align="center">0.518</td>
|
496 |
+
<td align="center">0.073</td>
|
497 |
+
<td align="center">46.4</td>
|
498 |
+
<td align="center">41.6</td>
|
499 |
+
<td align="center">42025812</td>
|
500 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ/42025812/model_final_4f7b58.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ/42025812/metrics.json">metrics</a></td>
|
501 |
+
</tr>
|
502 |
+
<!-- ROW: mask_rcnn_R_101_FPN_200ep_LSJ -->
|
503 |
+
<tr><td align="left"><a href="configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py">R101-FPN</a></td>
|
504 |
+
<td align="center">200</td>
|
505 |
+
<td align="center">0.518</td>
|
506 |
+
<td align="center">0.073</td>
|
507 |
+
<td align="center">48.0</td>
|
508 |
+
<td align="center">43.1</td>
|
509 |
+
<td align="center">42131867</td>
|
510 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ/42131867/model_final_0bb7ae.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ/42131867/metrics.json">metrics</a></td>
|
511 |
+
</tr>
|
512 |
+
<!-- ROW: mask_rcnn_R_101_FPN_400ep_LSJ -->
|
513 |
+
<tr><td align="left"><a href="configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py">R101-FPN</a></td>
|
514 |
+
<td align="center">400</td>
|
515 |
+
<td align="center">0.518</td>
|
516 |
+
<td align="center">0.073</td>
|
517 |
+
<td align="center">48.9</td>
|
518 |
+
<td align="center">43.7</td>
|
519 |
+
<td align="center">42073830</td>
|
520 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ/42073830/model_final_f96b26.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ/42073830/metrics.json">metrics</a></td>
|
521 |
+
</tr>
|
522 |
+
<!-- ROW: mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ -->
|
523 |
+
<tr><td align="left"><a href="configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py">regnetx_4gf_dds_FPN</a></td>
|
524 |
+
<td align="center">100</td>
|
525 |
+
<td align="center">0.474</td>
|
526 |
+
<td align="center">0.071</td>
|
527 |
+
<td align="center">46.0</td>
|
528 |
+
<td align="center">41.3</td>
|
529 |
+
<td align="center">42047771</td>
|
530 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ/42047771/model_final_b7fbab.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ/42047771/metrics.json">metrics</a></td>
|
531 |
+
</tr>
|
532 |
+
<!-- ROW: mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ -->
|
533 |
+
<tr><td align="left"><a href="configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py">regnetx_4gf_dds_FPN</a></td>
|
534 |
+
<td align="center">200</td>
|
535 |
+
<td align="center">0.474</td>
|
536 |
+
<td align="center">0.071</td>
|
537 |
+
<td align="center">48.1</td>
|
538 |
+
<td align="center">43.1</td>
|
539 |
+
<td align="center">42132721</td>
|
540 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ/42132721/model_final_5d87c1.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ/42132721/metrics.json">metrics</a></td>
|
541 |
+
</tr>
|
542 |
+
<!-- ROW: mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ -->
|
543 |
+
<tr><td align="left"><a href="configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py">regnetx_4gf_dds_FPN</a></td>
|
544 |
+
<td align="center">400</td>
|
545 |
+
<td align="center">0.474</td>
|
546 |
+
<td align="center">0.071</td>
|
547 |
+
<td align="center">48.6</td>
|
548 |
+
<td align="center">43.5</td>
|
549 |
+
<td align="center">42025447</td>
|
550 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ/42025447/model_final_f1362d.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ/42025447/metrics.json">metrics</a></td>
|
551 |
+
</tr>
|
552 |
+
<!-- ROW: mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ -->
|
553 |
+
<tr><td align="left"><a href="configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py">regnety_4gf_dds_FPN</a></td>
|
554 |
+
<td align="center">100</td>
|
555 |
+
<td align="center">0.487</td>
|
556 |
+
<td align="center">0.073</td>
|
557 |
+
<td align="center">46.1</td>
|
558 |
+
<td align="center">41.6</td>
|
559 |
+
<td align="center">42047784</td>
|
560 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ/42047784/model_final_6ba57e.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ/42047784/metrics.json">metrics</a></td>
|
561 |
+
</tr>
|
562 |
+
<!-- ROW: mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ -->
|
563 |
+
<tr><td align="left"><a href="configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py">regnety_4gf_dds_FPN</a></td>
|
564 |
+
<td align="center">200</td>
|
565 |
+
<td align="center">0.487</td>
|
566 |
+
<td align="center">0.072</td>
|
567 |
+
<td align="center">47.8</td>
|
568 |
+
<td align="center">43.0</td>
|
569 |
+
<td align="center">42047642</td>
|
570 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ/42047642/model_final_27b9c1.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ/42047642/metrics.json">metrics</a></td>
|
571 |
+
</tr>
|
572 |
+
<!-- ROW: mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ -->
|
573 |
+
<tr><td align="left"><a href="configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py">regnety_4gf_dds_FPN</a></td>
|
574 |
+
<td align="center">400</td>
|
575 |
+
<td align="center">0.487</td>
|
576 |
+
<td align="center">0.072</td>
|
577 |
+
<td align="center">48.2</td>
|
578 |
+
<td align="center">43.3</td>
|
579 |
+
<td align="center">42045954</td>
|
580 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ/42045954/model_final_ef3a80.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ/42045954/metrics.json">metrics</a></td>
|
581 |
+
</tr>
|
582 |
+
</tbody></table>
|
583 |
+
|
584 |
+
### COCO Person Keypoint Detection Baselines with Keypoint R-CNN
|
585 |
+
<!--
|
586 |
+
./gen_html_table.py --config 'COCO-Keypoints/*50*' 'COCO-Keypoints/*101*' --name R50-FPN R50-FPN R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP keypoint_AP
|
587 |
+
-->
|
588 |
+
|
589 |
+
|
590 |
+
<table><tbody>
|
591 |
+
<!-- START TABLE -->
|
592 |
+
<!-- TABLE HEADER -->
|
593 |
+
<th valign="bottom">Name</th>
|
594 |
+
<th valign="bottom">lr<br/>sched</th>
|
595 |
+
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
|
596 |
+
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
|
597 |
+
<th valign="bottom">train<br/>mem<br/>(GB)</th>
|
598 |
+
<th valign="bottom">box<br/>AP</th>
|
599 |
+
<th valign="bottom">kp.<br/>AP</th>
|
600 |
+
<th valign="bottom">model id</th>
|
601 |
+
<th valign="bottom">download</th>
|
602 |
+
<!-- TABLE BODY -->
|
603 |
+
<!-- ROW: keypoint_rcnn_R_50_FPN_1x -->
|
604 |
+
<tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
|
605 |
+
<td align="center">1x</td>
|
606 |
+
<td align="center">0.315</td>
|
607 |
+
<td align="center">0.072</td>
|
608 |
+
<td align="center">5.0</td>
|
609 |
+
<td align="center">53.6</td>
|
610 |
+
<td align="center">64.0</td>
|
611 |
+
<td align="center">137261548</td>
|
612 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x/137261548/model_final_04e291.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x/137261548/metrics.json">metrics</a></td>
|
613 |
+
</tr>
|
614 |
+
<!-- ROW: keypoint_rcnn_R_50_FPN_3x -->
|
615 |
+
<tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml">R50-FPN</a></td>
|
616 |
+
<td align="center">3x</td>
|
617 |
+
<td align="center">0.316</td>
|
618 |
+
<td align="center">0.066</td>
|
619 |
+
<td align="center">5.0</td>
|
620 |
+
<td align="center">55.4</td>
|
621 |
+
<td align="center">65.5</td>
|
622 |
+
<td align="center">137849621</td>
|
623 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/metrics.json">metrics</a></td>
|
624 |
+
</tr>
|
625 |
+
<!-- ROW: keypoint_rcnn_R_101_FPN_3x -->
|
626 |
+
<tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml">R101-FPN</a></td>
|
627 |
+
<td align="center">3x</td>
|
628 |
+
<td align="center">0.390</td>
|
629 |
+
<td align="center">0.076</td>
|
630 |
+
<td align="center">6.1</td>
|
631 |
+
<td align="center">56.4</td>
|
632 |
+
<td align="center">66.1</td>
|
633 |
+
<td align="center">138363331</td>
|
634 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x/138363331/model_final_997cc7.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x/138363331/metrics.json">metrics</a></td>
|
635 |
+
</tr>
|
636 |
+
<!-- ROW: keypoint_rcnn_X_101_32x8d_FPN_3x -->
|
637 |
+
<tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml">X101-FPN</a></td>
|
638 |
+
<td align="center">3x</td>
|
639 |
+
<td align="center">0.738</td>
|
640 |
+
<td align="center">0.121</td>
|
641 |
+
<td align="center">8.7</td>
|
642 |
+
<td align="center">57.3</td>
|
643 |
+
<td align="center">66.0</td>
|
644 |
+
<td align="center">139686956</td>
|
645 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x/139686956/model_final_5ad38f.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x/139686956/metrics.json">metrics</a></td>
|
646 |
+
</tr>
|
647 |
+
</tbody></table>
|
648 |
+
|
649 |
+
### COCO Panoptic Segmentation Baselines with Panoptic FPN
|
650 |
+
<!--
|
651 |
+
./gen_html_table.py --config 'COCO-PanopticSegmentation/*50*' 'COCO-PanopticSegmentation/*101*' --name R50-FPN R50-FPN R101-FPN --fields lr_sched train_speed inference_speed mem box_AP mask_AP PQ
|
652 |
+
-->
|
653 |
+
|
654 |
+
|
655 |
+
<table><tbody>
|
656 |
+
<!-- START TABLE -->
|
657 |
+
<!-- TABLE HEADER -->
|
658 |
+
<th valign="bottom">Name</th>
|
659 |
+
<th valign="bottom">lr<br/>sched</th>
|
660 |
+
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
|
661 |
+
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
|
662 |
+
<th valign="bottom">train<br/>mem<br/>(GB)</th>
|
663 |
+
<th valign="bottom">box<br/>AP</th>
|
664 |
+
<th valign="bottom">mask<br/>AP</th>
|
665 |
+
<th valign="bottom">PQ</th>
|
666 |
+
<th valign="bottom">model id</th>
|
667 |
+
<th valign="bottom">download</th>
|
668 |
+
<!-- TABLE BODY -->
|
669 |
+
<!-- ROW: panoptic_fpn_R_50_1x -->
|
670 |
+
<tr><td align="left"><a href="configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml">R50-FPN</a></td>
|
671 |
+
<td align="center">1x</td>
|
672 |
+
<td align="center">0.304</td>
|
673 |
+
<td align="center">0.053</td>
|
674 |
+
<td align="center">4.8</td>
|
675 |
+
<td align="center">37.6</td>
|
676 |
+
<td align="center">34.7</td>
|
677 |
+
<td align="center">39.4</td>
|
678 |
+
<td align="center">139514544</td>
|
679 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x/139514544/model_final_dbfeb4.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x/139514544/metrics.json">metrics</a></td>
|
680 |
+
</tr>
|
681 |
+
<!-- ROW: panoptic_fpn_R_50_3x -->
|
682 |
+
<tr><td align="left"><a href="configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml">R50-FPN</a></td>
|
683 |
+
<td align="center">3x</td>
|
684 |
+
<td align="center">0.302</td>
|
685 |
+
<td align="center">0.053</td>
|
686 |
+
<td align="center">4.8</td>
|
687 |
+
<td align="center">40.0</td>
|
688 |
+
<td align="center">36.5</td>
|
689 |
+
<td align="center">41.5</td>
|
690 |
+
<td align="center">139514569</td>
|
691 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/metrics.json">metrics</a></td>
|
692 |
+
</tr>
|
693 |
+
<!-- ROW: panoptic_fpn_R_101_3x -->
|
694 |
+
<tr><td align="left"><a href="configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml">R101-FPN</a></td>
|
695 |
+
<td align="center">3x</td>
|
696 |
+
<td align="center">0.392</td>
|
697 |
+
<td align="center">0.066</td>
|
698 |
+
<td align="center">6.0</td>
|
699 |
+
<td align="center">42.4</td>
|
700 |
+
<td align="center">38.5</td>
|
701 |
+
<td align="center">43.0</td>
|
702 |
+
<td align="center">139514519</td>
|
703 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x/139514519/model_final_cafdb1.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x/139514519/metrics.json">metrics</a></td>
|
704 |
+
</tr>
|
705 |
+
</tbody></table>
|
706 |
+
|
707 |
+
|
708 |
+
### LVIS Instance Segmentation Baselines with Mask R-CNN
|
709 |
+
|
710 |
+
Mask R-CNN baselines on the [LVIS dataset](https://lvisdataset.org), v0.5.
|
711 |
+
These baselines are described in Table 3(c) of the [LVIS paper](https://arxiv.org/abs/1908.03195).
|
712 |
+
|
713 |
+
NOTE: the 1x schedule here has the same amount of __iterations__ as the COCO 1x baselines.
|
714 |
+
They are roughly 24 epochs of LVISv0.5 data.
|
715 |
+
The final results of these configs have large variance across different runs.
|
716 |
+
|
717 |
+
<!--
|
718 |
+
./gen_html_table.py --config 'LVISv0.5-InstanceSegmentation/mask*50*' 'LVISv0.5-InstanceSegmentation/mask*101*' --name R50-FPN R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP mask_AP
|
719 |
+
-->
|
720 |
+
|
721 |
+
|
722 |
+
<table><tbody>
|
723 |
+
<!-- START TABLE -->
|
724 |
+
<!-- TABLE HEADER -->
|
725 |
+
<th valign="bottom">Name</th>
|
726 |
+
<th valign="bottom">lr<br/>sched</th>
|
727 |
+
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
|
728 |
+
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
|
729 |
+
<th valign="bottom">train<br/>mem<br/>(GB)</th>
|
730 |
+
<th valign="bottom">box<br/>AP</th>
|
731 |
+
<th valign="bottom">mask<br/>AP</th>
|
732 |
+
<th valign="bottom">model id</th>
|
733 |
+
<th valign="bottom">download</th>
|
734 |
+
<!-- TABLE BODY -->
|
735 |
+
<!-- ROW: mask_rcnn_R_50_FPN_1x -->
|
736 |
+
<tr><td align="left"><a href="configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
|
737 |
+
<td align="center">1x</td>
|
738 |
+
<td align="center">0.292</td>
|
739 |
+
<td align="center">0.107</td>
|
740 |
+
<td align="center">7.1</td>
|
741 |
+
<td align="center">23.6</td>
|
742 |
+
<td align="center">24.4</td>
|
743 |
+
<td align="center">144219072</td>
|
744 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/144219072/model_final_571f7c.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/144219072/metrics.json">metrics</a></td>
|
745 |
+
</tr>
|
746 |
+
<!-- ROW: mask_rcnn_R_101_FPN_1x -->
|
747 |
+
<tr><td align="left"><a href="configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml">R101-FPN</a></td>
|
748 |
+
<td align="center">1x</td>
|
749 |
+
<td align="center">0.371</td>
|
750 |
+
<td align="center">0.114</td>
|
751 |
+
<td align="center">7.8</td>
|
752 |
+
<td align="center">25.6</td>
|
753 |
+
<td align="center">25.9</td>
|
754 |
+
<td align="center">144219035</td>
|
755 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x/144219035/model_final_824ab5.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x/144219035/metrics.json">metrics</a></td>
|
756 |
+
</tr>
|
757 |
+
<!-- ROW: mask_rcnn_X_101_32x8d_FPN_1x -->
|
758 |
+
<tr><td align="left"><a href="configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml">X101-FPN</a></td>
|
759 |
+
<td align="center">1x</td>
|
760 |
+
<td align="center">0.712</td>
|
761 |
+
<td align="center">0.151</td>
|
762 |
+
<td align="center">10.2</td>
|
763 |
+
<td align="center">26.7</td>
|
764 |
+
<td align="center">27.1</td>
|
765 |
+
<td align="center">144219108</td>
|
766 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x/144219108/model_final_5e3439.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x/144219108/metrics.json">metrics</a></td>
|
767 |
+
</tr>
|
768 |
+
</tbody></table>
|
769 |
+
|
770 |
+
|
771 |
+
|
772 |
+
### Cityscapes & Pascal VOC Baselines
|
773 |
+
|
774 |
+
Simple baselines for
|
775 |
+
* Mask R-CNN on Cityscapes instance segmentation (initialized from COCO pre-training, then trained on Cityscapes fine annotations only)
|
776 |
+
* Faster R-CNN on PASCAL VOC object detection (trained on VOC 2007 train+val + VOC 2012 train+val, tested on VOC 2007 using 11-point interpolated AP)
|
777 |
+
|
778 |
+
<!--
|
779 |
+
./gen_html_table.py --config 'Cityscapes/*' 'PascalVOC-Detection/*' --name "R50-FPN, Cityscapes" "R50-C4, VOC" --fields train_speed inference_speed mem box_AP box_AP50 mask_AP
|
780 |
+
-->
|
781 |
+
|
782 |
+
|
783 |
+
<table><tbody>
|
784 |
+
<!-- START TABLE -->
|
785 |
+
<!-- TABLE HEADER -->
|
786 |
+
<th valign="bottom">Name</th>
|
787 |
+
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
|
788 |
+
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
|
789 |
+
<th valign="bottom">train<br/>mem<br/>(GB)</th>
|
790 |
+
<th valign="bottom">box<br/>AP</th>
|
791 |
+
<th valign="bottom">box<br/>AP50</th>
|
792 |
+
<th valign="bottom">mask<br/>AP</th>
|
793 |
+
<th valign="bottom">model id</th>
|
794 |
+
<th valign="bottom">download</th>
|
795 |
+
<!-- TABLE BODY -->
|
796 |
+
<!-- ROW: mask_rcnn_R_50_FPN -->
|
797 |
+
<tr><td align="left"><a href="configs/Cityscapes/mask_rcnn_R_50_FPN.yaml">R50-FPN, Cityscapes</a></td>
|
798 |
+
<td align="center">0.240</td>
|
799 |
+
<td align="center">0.078</td>
|
800 |
+
<td align="center">4.4</td>
|
801 |
+
<td align="center"></td>
|
802 |
+
<td align="center"></td>
|
803 |
+
<td align="center">36.5</td>
|
804 |
+
<td align="center">142423278</td>
|
805 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Cityscapes/mask_rcnn_R_50_FPN/142423278/model_final_af9cf5.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/Cityscapes/mask_rcnn_R_50_FPN/142423278/metrics.json">metrics</a></td>
|
806 |
+
</tr>
|
807 |
+
<!-- ROW: faster_rcnn_R_50_C4 -->
|
808 |
+
<tr><td align="left"><a href="configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml">R50-C4, VOC</a></td>
|
809 |
+
<td align="center">0.537</td>
|
810 |
+
<td align="center">0.081</td>
|
811 |
+
<td align="center">4.8</td>
|
812 |
+
<td align="center">51.9</td>
|
813 |
+
<td align="center">80.3</td>
|
814 |
+
<td align="center"></td>
|
815 |
+
<td align="center">142202221</td>
|
816 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/PascalVOC-Detection/faster_rcnn_R_50_C4/142202221/model_final_b1acc2.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/PascalVOC-Detection/faster_rcnn_R_50_C4/142202221/metrics.json">metrics</a></td>
|
817 |
+
</tr>
|
818 |
+
</tbody></table>
|
819 |
+
|
820 |
+
|
821 |
+
|
822 |
+
### Other Settings
|
823 |
+
|
824 |
+
Ablations for Deformable Conv and Cascade R-CNN:
|
825 |
+
|
826 |
+
<!--
|
827 |
+
./gen_html_table.py --config 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml' 'Misc/*R_50_FPN_1x_dconv*' 'Misc/cascade*1x.yaml' 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml' 'Misc/*R_50_FPN_3x_dconv*' 'Misc/cascade*3x.yaml' --name "Baseline R50-FPN" "Deformable Conv" "Cascade R-CNN" "Baseline R50-FPN" "Deformable Conv" "Cascade R-CNN" --fields lr_sched train_speed inference_speed mem box_AP mask_AP
|
828 |
+
-->
|
829 |
+
|
830 |
+
|
831 |
+
<table><tbody>
|
832 |
+
<!-- START TABLE -->
|
833 |
+
<!-- TABLE HEADER -->
|
834 |
+
<th valign="bottom">Name</th>
|
835 |
+
<th valign="bottom">lr<br/>sched</th>
|
836 |
+
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
|
837 |
+
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
|
838 |
+
<th valign="bottom">train<br/>mem<br/>(GB)</th>
|
839 |
+
<th valign="bottom">box<br/>AP</th>
|
840 |
+
<th valign="bottom">mask<br/>AP</th>
|
841 |
+
<th valign="bottom">model id</th>
|
842 |
+
<th valign="bottom">download</th>
|
843 |
+
<!-- TABLE BODY -->
|
844 |
+
<!-- ROW: mask_rcnn_R_50_FPN_1x -->
|
845 |
+
<tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml">Baseline R50-FPN</a></td>
|
846 |
+
<td align="center">1x</td>
|
847 |
+
<td align="center">0.261</td>
|
848 |
+
<td align="center">0.043</td>
|
849 |
+
<td align="center">3.4</td>
|
850 |
+
<td align="center">38.6</td>
|
851 |
+
<td align="center">35.2</td>
|
852 |
+
<td align="center">137260431</td>
|
853 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/model_final_a54504.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/metrics.json">metrics</a></td>
|
854 |
+
</tr>
|
855 |
+
<!-- ROW: mask_rcnn_R_50_FPN_1x_dconv_c3-c5 -->
|
856 |
+
<tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml">Deformable Conv</a></td>
|
857 |
+
<td align="center">1x</td>
|
858 |
+
<td align="center">0.342</td>
|
859 |
+
<td align="center">0.048</td>
|
860 |
+
<td align="center">3.5</td>
|
861 |
+
<td align="center">41.5</td>
|
862 |
+
<td align="center">37.5</td>
|
863 |
+
<td align="center">138602867</td>
|
864 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5/138602867/model_final_65c703.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5/138602867/metrics.json">metrics</a></td>
|
865 |
+
</tr>
|
866 |
+
<!-- ROW: cascade_mask_rcnn_R_50_FPN_1x -->
|
867 |
+
<tr><td align="left"><a href="configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml">Cascade R-CNN</a></td>
|
868 |
+
<td align="center">1x</td>
|
869 |
+
<td align="center">0.317</td>
|
870 |
+
<td align="center">0.052</td>
|
871 |
+
<td align="center">4.0</td>
|
872 |
+
<td align="center">42.1</td>
|
873 |
+
<td align="center">36.4</td>
|
874 |
+
<td align="center">138602847</td>
|
875 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_1x/138602847/model_final_e9d89b.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_1x/138602847/metrics.json">metrics</a></td>
|
876 |
+
</tr>
|
877 |
+
<!-- ROW: mask_rcnn_R_50_FPN_3x -->
|
878 |
+
<tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml">Baseline R50-FPN</a></td>
|
879 |
+
<td align="center">3x</td>
|
880 |
+
<td align="center">0.261</td>
|
881 |
+
<td align="center">0.043</td>
|
882 |
+
<td align="center">3.4</td>
|
883 |
+
<td align="center">41.0</td>
|
884 |
+
<td align="center">37.2</td>
|
885 |
+
<td align="center">137849600</td>
|
886 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/metrics.json">metrics</a></td>
|
887 |
+
</tr>
|
888 |
+
<!-- ROW: mask_rcnn_R_50_FPN_3x_dconv_c3-c5 -->
|
889 |
+
<tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml">Deformable Conv</a></td>
|
890 |
+
<td align="center">3x</td>
|
891 |
+
<td align="center">0.349</td>
|
892 |
+
<td align="center">0.047</td>
|
893 |
+
<td align="center">3.5</td>
|
894 |
+
<td align="center">42.7</td>
|
895 |
+
<td align="center">38.5</td>
|
896 |
+
<td align="center">144998336</td>
|
897 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5/144998336/model_final_821d0b.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5/144998336/metrics.json">metrics</a></td>
|
898 |
+
</tr>
|
899 |
+
<!-- ROW: cascade_mask_rcnn_R_50_FPN_3x -->
|
900 |
+
<tr><td align="left"><a href="configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml">Cascade R-CNN</a></td>
|
901 |
+
<td align="center">3x</td>
|
902 |
+
<td align="center">0.328</td>
|
903 |
+
<td align="center">0.053</td>
|
904 |
+
<td align="center">4.0</td>
|
905 |
+
<td align="center">44.3</td>
|
906 |
+
<td align="center">38.5</td>
|
907 |
+
<td align="center">144998488</td>
|
908 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/metrics.json">metrics</a></td>
|
909 |
+
</tr>
|
910 |
+
</tbody></table>
|
911 |
+
|
912 |
+
|
913 |
+
Ablations for normalization methods, and a few models trained from scratch following [Rethinking ImageNet Pre-training](https://arxiv.org/abs/1811.08883).
|
914 |
+
(Note: The baseline uses `2fc` head while the others use [`4conv1fc` head](https://arxiv.org/abs/1803.08494))
|
915 |
+
<!--
|
916 |
+
./gen_html_table.py --config 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml' 'Misc/mask*50_FPN_3x_gn.yaml' 'Misc/mask*50_FPN_3x_syncbn.yaml' 'Misc/scratch*' --name "Baseline R50-FPN" "GN" "SyncBN" "GN (from scratch)" "GN (from scratch)" "SyncBN (from scratch)" --fields lr_sched train_speed inference_speed mem box_AP mask_AP
|
917 |
+
-->
|
918 |
+
|
919 |
+
|
920 |
+
<table><tbody>
|
921 |
+
<!-- START TABLE -->
|
922 |
+
<!-- TABLE HEADER -->
|
923 |
+
<th valign="bottom">Name</th>
|
924 |
+
<th valign="bottom">lr<br/>sched</th>
|
925 |
+
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
|
926 |
+
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
|
927 |
+
<th valign="bottom">train<br/>mem<br/>(GB)</th>
|
928 |
+
<th valign="bottom">box<br/>AP</th>
|
929 |
+
<th valign="bottom">mask<br/>AP</th>
|
930 |
+
<th valign="bottom">model id</th>
|
931 |
+
<th valign="bottom">download</th>
|
932 |
+
<!-- TABLE BODY -->
|
933 |
+
<!-- ROW: mask_rcnn_R_50_FPN_3x -->
|
934 |
+
<tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml">Baseline R50-FPN</a></td>
|
935 |
+
<td align="center">3x</td>
|
936 |
+
<td align="center">0.261</td>
|
937 |
+
<td align="center">0.043</td>
|
938 |
+
<td align="center">3.4</td>
|
939 |
+
<td align="center">41.0</td>
|
940 |
+
<td align="center">37.2</td>
|
941 |
+
<td align="center">137849600</td>
|
942 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/metrics.json">metrics</a></td>
|
943 |
+
</tr>
|
944 |
+
<!-- ROW: mask_rcnn_R_50_FPN_3x_gn -->
|
945 |
+
<tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml">GN</a></td>
|
946 |
+
<td align="center">3x</td>
|
947 |
+
<td align="center">0.309</td>
|
948 |
+
<td align="center">0.060</td>
|
949 |
+
<td align="center">5.6</td>
|
950 |
+
<td align="center">42.6</td>
|
951 |
+
<td align="center">38.6</td>
|
952 |
+
<td align="center">138602888</td>
|
953 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_gn/138602888/model_final_dc5d9e.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_gn/138602888/metrics.json">metrics</a></td>
|
954 |
+
</tr>
|
955 |
+
<!-- ROW: mask_rcnn_R_50_FPN_3x_syncbn -->
|
956 |
+
<tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml">SyncBN</a></td>
|
957 |
+
<td align="center">3x</td>
|
958 |
+
<td align="center">0.345</td>
|
959 |
+
<td align="center">0.053</td>
|
960 |
+
<td align="center">5.5</td>
|
961 |
+
<td align="center">41.9</td>
|
962 |
+
<td align="center">37.8</td>
|
963 |
+
<td align="center">169527823</td>
|
964 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_syncbn/169527823/model_final_3b3c51.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_syncbn/169527823/metrics.json">metrics</a></td>
|
965 |
+
</tr>
|
966 |
+
<!-- ROW: scratch_mask_rcnn_R_50_FPN_3x_gn -->
|
967 |
+
<tr><td align="left"><a href="configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml">GN (from scratch)</a></td>
|
968 |
+
<td align="center">3x</td>
|
969 |
+
<td align="center">0.338</td>
|
970 |
+
<td align="center">0.061</td>
|
971 |
+
<td align="center">7.2</td>
|
972 |
+
<td align="center">39.9</td>
|
973 |
+
<td align="center">36.6</td>
|
974 |
+
<td align="center">138602908</td>
|
975 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn/138602908/model_final_01ca85.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn/138602908/metrics.json">metrics</a></td>
|
976 |
+
</tr>
|
977 |
+
<!-- ROW: scratch_mask_rcnn_R_50_FPN_9x_gn -->
|
978 |
+
<tr><td align="left"><a href="configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml">GN (from scratch)</a></td>
|
979 |
+
<td align="center">9x</td>
|
980 |
+
<td align="center">N/A</td>
|
981 |
+
<td align="center">0.061</td>
|
982 |
+
<td align="center">7.2</td>
|
983 |
+
<td align="center">43.7</td>
|
984 |
+
<td align="center">39.6</td>
|
985 |
+
<td align="center">183808979</td>
|
986 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn/183808979/model_final_da7b4c.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn/183808979/metrics.json">metrics</a></td>
|
987 |
+
</tr>
|
988 |
+
<!-- ROW: scratch_mask_rcnn_R_50_FPN_9x_syncbn -->
|
989 |
+
<tr><td align="left"><a href="configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml">SyncBN (from scratch)</a></td>
|
990 |
+
<td align="center">9x</td>
|
991 |
+
<td align="center">N/A</td>
|
992 |
+
<td align="center">0.055</td>
|
993 |
+
<td align="center">7.2</td>
|
994 |
+
<td align="center">43.6</td>
|
995 |
+
<td align="center">39.3</td>
|
996 |
+
<td align="center">184226666</td>
|
997 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn/184226666/model_final_5ce33e.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn/184226666/metrics.json">metrics</a></td>
|
998 |
+
</tr>
|
999 |
+
</tbody></table>
|
1000 |
+
|
1001 |
+
|
1002 |
+
A few very large models trained for a long time, for demo purposes. They are trained using multiple machines:
|
1003 |
+
|
1004 |
+
<!--
|
1005 |
+
./gen_html_table.py --config 'Misc/panoptic_*dconv*' 'Misc/cascade_*152*' --name "Panoptic FPN R101" "Mask R-CNN X152" --fields inference_speed mem box_AP mask_AP PQ
|
1006 |
+
# manually add TTA results
|
1007 |
+
-->
|
1008 |
+
|
1009 |
+
|
1010 |
+
<table><tbody>
|
1011 |
+
<!-- START TABLE -->
|
1012 |
+
<!-- TABLE HEADER -->
|
1013 |
+
<th valign="bottom">Name</th>
|
1014 |
+
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
|
1015 |
+
<th valign="bottom">train<br/>mem<br/>(GB)</th>
|
1016 |
+
<th valign="bottom">box<br/>AP</th>
|
1017 |
+
<th valign="bottom">mask<br/>AP</th>
|
1018 |
+
<th valign="bottom">PQ</th>
|
1019 |
+
<th valign="bottom">model id</th>
|
1020 |
+
<th valign="bottom">download</th>
|
1021 |
+
<!-- TABLE BODY -->
|
1022 |
+
<!-- ROW: panoptic_fpn_R_101_dconv_cascade_gn_3x -->
|
1023 |
+
<tr><td align="left"><a href="configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml">Panoptic FPN R101</a></td>
|
1024 |
+
<td align="center">0.098</td>
|
1025 |
+
<td align="center">11.4</td>
|
1026 |
+
<td align="center">47.4</td>
|
1027 |
+
<td align="center">41.3</td>
|
1028 |
+
<td align="center">46.1</td>
|
1029 |
+
<td align="center">139797668</td>
|
1030 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x/139797668/model_final_be35db.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x/139797668/metrics.json">metrics</a></td>
|
1031 |
+
</tr>
|
1032 |
+
<!-- ROW: cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv -->
|
1033 |
+
<tr><td align="left"><a href="configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml">Mask R-CNN X152</a></td>
|
1034 |
+
<td align="center">0.234</td>
|
1035 |
+
<td align="center">15.1</td>
|
1036 |
+
<td align="center">50.2</td>
|
1037 |
+
<td align="center">44.0</td>
|
1038 |
+
<td align="center"></td>
|
1039 |
+
<td align="center">18131413</td>
|
1040 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv/18131413/model_0039999_e76410.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv/18131413/metrics.json">metrics</a></td>
|
1041 |
+
</tr>
|
1042 |
+
<!-- ROW: TTA cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv -->
|
1043 |
+
<tr><td align="left">above + test-time aug.</td>
|
1044 |
+
<td align="center"></td>
|
1045 |
+
<td align="center"></td>
|
1046 |
+
<td align="center">51.9</td>
|
1047 |
+
<td align="center">45.9</td>
|
1048 |
+
<td align="center"></td>
|
1049 |
+
<td align="center"></td>
|
1050 |
+
<td align="center"></td>
|
1051 |
+
</tr>
|
1052 |
+
</tbody></table>
|
detectron2/README.md
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<img src=".github/Detectron2-Logo-Horz.svg" width="300" >
|
2 |
+
|
3 |
+
<a href="https://opensource.facebook.com/support-ukraine">
|
4 |
+
<img src="https://img.shields.io/badge/Support-Ukraine-FFD500?style=flat&labelColor=005BBB" alt="Support Ukraine - Help Provide Humanitarian Aid to Ukraine." />
|
5 |
+
</a>
|
6 |
+
|
7 |
+
Detectron2 is Facebook AI Research's next generation library
|
8 |
+
that provides state-of-the-art detection and segmentation algorithms.
|
9 |
+
It is the successor of
|
10 |
+
[Detectron](https://github.com/facebookresearch/Detectron/)
|
11 |
+
and [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark/).
|
12 |
+
It supports a number of computer vision research projects and production applications in Facebook.
|
13 |
+
|
14 |
+
<div align="center">
|
15 |
+
<img src="https://user-images.githubusercontent.com/1381301/66535560-d3422200-eace-11e9-9123-5535d469db19.png"/>
|
16 |
+
</div>
|
17 |
+
<br>
|
18 |
+
|
19 |
+
## Learn More about Detectron2
|
20 |
+
|
21 |
+
Explain Like Iβm 5: Detectron2 | Using Machine Learning with Detectron2
|
22 |
+
:-------------------------:|:-------------------------:
|
23 |
+
[](https://www.youtube.com/watch?v=1oq1Ye7dFqc) | [](https://www.youtube.com/watch?v=eUSgtfK4ivk)
|
24 |
+
|
25 |
+
## What's New
|
26 |
+
* Includes new capabilities such as panoptic segmentation, Densepose, Cascade R-CNN, rotated bounding boxes, PointRend,
|
27 |
+
DeepLab, etc.
|
28 |
+
* Used as a library to support building [research projects](projects/) on top of it.
|
29 |
+
* Models can be exported to TorchScript format or Caffe2 format for deployment.
|
30 |
+
* It [trains much faster](https://detectron2.readthedocs.io/notes/benchmarks.html).
|
31 |
+
|
32 |
+
See our [blog post](https://ai.facebook.com/blog/-detectron2-a-pytorch-based-modular-object-detection-library-/)
|
33 |
+
to see more demos and learn about detectron2.
|
34 |
+
|
35 |
+
## Installation
|
36 |
+
|
37 |
+
See [installation instructions](https://detectron2.readthedocs.io/tutorials/install.html).
|
38 |
+
|
39 |
+
## Getting Started
|
40 |
+
|
41 |
+
See [Getting Started with Detectron2](https://detectron2.readthedocs.io/tutorials/getting_started.html),
|
42 |
+
and the [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
|
43 |
+
to learn about basic usage.
|
44 |
+
|
45 |
+
Learn more at our [documentation](https://detectron2.readthedocs.org).
|
46 |
+
And see [projects/](projects/) for some projects that are built on top of detectron2.
|
47 |
+
|
48 |
+
## Model Zoo and Baselines
|
49 |
+
|
50 |
+
We provide a large set of baseline results and trained models available for download in the [Detectron2 Model Zoo](MODEL_ZOO.md).
|
51 |
+
|
52 |
+
## License
|
53 |
+
|
54 |
+
Detectron2 is released under the [Apache 2.0 license](LICENSE).
|
55 |
+
|
56 |
+
## Citing Detectron2
|
57 |
+
|
58 |
+
If you use Detectron2 in your research or wish to refer to the baseline results published in the [Model Zoo](MODEL_ZOO.md), please use the following BibTeX entry.
|
59 |
+
|
60 |
+
```BibTeX
|
61 |
+
@misc{wu2019detectron2,
|
62 |
+
author = {Yuxin Wu and Alexander Kirillov and Francisco Massa and
|
63 |
+
Wan-Yen Lo and Ross Girshick},
|
64 |
+
title = {Detectron2},
|
65 |
+
howpublished = {\url{https://github.com/facebookresearch/detectron2}},
|
66 |
+
year = {2019}
|
67 |
+
}
|
68 |
+
```
|
detectron2/build/lib.linux-x86_64-3.10/detectron2/_C.cpython-310-x86_64-linux-gnu.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b144d19f93f6ecb19d3c658e41bbe146f5c4cb3ddfd7ae691b41fc62475aa8a
|
3 |
+
size 17491024
|
detectron2/build/lib.linux-x86_64-3.10/detectron2/__init__.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
|
3 |
+
from .utils.env import setup_environment
|
4 |
+
|
5 |
+
setup_environment()
|
6 |
+
|
7 |
+
|
8 |
+
# This line will be programatically read/write by setup.py.
|
9 |
+
# Leave them at the bottom of this file and don't touch them.
|
10 |
+
__version__ = "0.6"
|
detectron2/build/lib.linux-x86_64-3.10/detectron2/checkpoint/__init__.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
3 |
+
# File:
|
4 |
+
|
5 |
+
|
6 |
+
from . import catalog as _UNUSED # register the handler
|
7 |
+
from .detection_checkpoint import DetectionCheckpointer
|
8 |
+
from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer
|
9 |
+
|
10 |
+
__all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"]
|
detectron2/build/lib.linux-x86_64-3.10/detectron2/checkpoint/c2_model_loading.py
ADDED
@@ -0,0 +1,407 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
import copy
|
3 |
+
import logging
|
4 |
+
import re
|
5 |
+
from typing import Dict, List
|
6 |
+
import torch
|
7 |
+
from tabulate import tabulate
|
8 |
+
|
9 |
+
|
10 |
+
def convert_basic_c2_names(original_keys):
|
11 |
+
"""
|
12 |
+
Apply some basic name conversion to names in C2 weights.
|
13 |
+
It only deals with typical backbone models.
|
14 |
+
|
15 |
+
Args:
|
16 |
+
original_keys (list[str]):
|
17 |
+
Returns:
|
18 |
+
list[str]: The same number of strings matching those in original_keys.
|
19 |
+
"""
|
20 |
+
layer_keys = copy.deepcopy(original_keys)
|
21 |
+
layer_keys = [
|
22 |
+
{"pred_b": "linear_b", "pred_w": "linear_w"}.get(k, k) for k in layer_keys
|
23 |
+
] # some hard-coded mappings
|
24 |
+
|
25 |
+
layer_keys = [k.replace("_", ".") for k in layer_keys]
|
26 |
+
layer_keys = [re.sub("\\.b$", ".bias", k) for k in layer_keys]
|
27 |
+
layer_keys = [re.sub("\\.w$", ".weight", k) for k in layer_keys]
|
28 |
+
# Uniform both bn and gn names to "norm"
|
29 |
+
layer_keys = [re.sub("bn\\.s$", "norm.weight", k) for k in layer_keys]
|
30 |
+
layer_keys = [re.sub("bn\\.bias$", "norm.bias", k) for k in layer_keys]
|
31 |
+
layer_keys = [re.sub("bn\\.rm", "norm.running_mean", k) for k in layer_keys]
|
32 |
+
layer_keys = [re.sub("bn\\.running.mean$", "norm.running_mean", k) for k in layer_keys]
|
33 |
+
layer_keys = [re.sub("bn\\.riv$", "norm.running_var", k) for k in layer_keys]
|
34 |
+
layer_keys = [re.sub("bn\\.running.var$", "norm.running_var", k) for k in layer_keys]
|
35 |
+
layer_keys = [re.sub("bn\\.gamma$", "norm.weight", k) for k in layer_keys]
|
36 |
+
layer_keys = [re.sub("bn\\.beta$", "norm.bias", k) for k in layer_keys]
|
37 |
+
layer_keys = [re.sub("gn\\.s$", "norm.weight", k) for k in layer_keys]
|
38 |
+
layer_keys = [re.sub("gn\\.bias$", "norm.bias", k) for k in layer_keys]
|
39 |
+
|
40 |
+
# stem
|
41 |
+
layer_keys = [re.sub("^res\\.conv1\\.norm\\.", "conv1.norm.", k) for k in layer_keys]
|
42 |
+
# to avoid mis-matching with "conv1" in other components (e.g. detection head)
|
43 |
+
layer_keys = [re.sub("^conv1\\.", "stem.conv1.", k) for k in layer_keys]
|
44 |
+
|
45 |
+
# layer1-4 is used by torchvision, however we follow the C2 naming strategy (res2-5)
|
46 |
+
# layer_keys = [re.sub("^res2.", "layer1.", k) for k in layer_keys]
|
47 |
+
# layer_keys = [re.sub("^res3.", "layer2.", k) for k in layer_keys]
|
48 |
+
# layer_keys = [re.sub("^res4.", "layer3.", k) for k in layer_keys]
|
49 |
+
# layer_keys = [re.sub("^res5.", "layer4.", k) for k in layer_keys]
|
50 |
+
|
51 |
+
# blocks
|
52 |
+
layer_keys = [k.replace(".branch1.", ".shortcut.") for k in layer_keys]
|
53 |
+
layer_keys = [k.replace(".branch2a.", ".conv1.") for k in layer_keys]
|
54 |
+
layer_keys = [k.replace(".branch2b.", ".conv2.") for k in layer_keys]
|
55 |
+
layer_keys = [k.replace(".branch2c.", ".conv3.") for k in layer_keys]
|
56 |
+
|
57 |
+
# DensePose substitutions
|
58 |
+
layer_keys = [re.sub("^body.conv.fcn", "body_conv_fcn", k) for k in layer_keys]
|
59 |
+
layer_keys = [k.replace("AnnIndex.lowres", "ann_index_lowres") for k in layer_keys]
|
60 |
+
layer_keys = [k.replace("Index.UV.lowres", "index_uv_lowres") for k in layer_keys]
|
61 |
+
layer_keys = [k.replace("U.lowres", "u_lowres") for k in layer_keys]
|
62 |
+
layer_keys = [k.replace("V.lowres", "v_lowres") for k in layer_keys]
|
63 |
+
return layer_keys
|
64 |
+
|
65 |
+
|
66 |
+
def convert_c2_detectron_names(weights):
|
67 |
+
"""
|
68 |
+
Map Caffe2 Detectron weight names to Detectron2 names.
|
69 |
+
|
70 |
+
Args:
|
71 |
+
weights (dict): name -> tensor
|
72 |
+
|
73 |
+
Returns:
|
74 |
+
dict: detectron2 names -> tensor
|
75 |
+
dict: detectron2 names -> C2 names
|
76 |
+
"""
|
77 |
+
logger = logging.getLogger(__name__)
|
78 |
+
logger.info("Renaming Caffe2 weights ......")
|
79 |
+
original_keys = sorted(weights.keys())
|
80 |
+
layer_keys = copy.deepcopy(original_keys)
|
81 |
+
|
82 |
+
layer_keys = convert_basic_c2_names(layer_keys)
|
83 |
+
|
84 |
+
# --------------------------------------------------------------------------
|
85 |
+
# RPN hidden representation conv
|
86 |
+
# --------------------------------------------------------------------------
|
87 |
+
# FPN case
|
88 |
+
# In the C2 model, the RPN hidden layer conv is defined for FPN level 2 and then
|
89 |
+
# shared for all other levels, hence the appearance of "fpn2"
|
90 |
+
layer_keys = [
|
91 |
+
k.replace("conv.rpn.fpn2", "proposal_generator.rpn_head.conv") for k in layer_keys
|
92 |
+
]
|
93 |
+
# Non-FPN case
|
94 |
+
layer_keys = [k.replace("conv.rpn", "proposal_generator.rpn_head.conv") for k in layer_keys]
|
95 |
+
|
96 |
+
# --------------------------------------------------------------------------
|
97 |
+
# RPN box transformation conv
|
98 |
+
# --------------------------------------------------------------------------
|
99 |
+
# FPN case (see note above about "fpn2")
|
100 |
+
layer_keys = [
|
101 |
+
k.replace("rpn.bbox.pred.fpn2", "proposal_generator.rpn_head.anchor_deltas")
|
102 |
+
for k in layer_keys
|
103 |
+
]
|
104 |
+
layer_keys = [
|
105 |
+
k.replace("rpn.cls.logits.fpn2", "proposal_generator.rpn_head.objectness_logits")
|
106 |
+
for k in layer_keys
|
107 |
+
]
|
108 |
+
# Non-FPN case
|
109 |
+
layer_keys = [
|
110 |
+
k.replace("rpn.bbox.pred", "proposal_generator.rpn_head.anchor_deltas") for k in layer_keys
|
111 |
+
]
|
112 |
+
layer_keys = [
|
113 |
+
k.replace("rpn.cls.logits", "proposal_generator.rpn_head.objectness_logits")
|
114 |
+
for k in layer_keys
|
115 |
+
]
|
116 |
+
|
117 |
+
# --------------------------------------------------------------------------
|
118 |
+
# Fast R-CNN box head
|
119 |
+
# --------------------------------------------------------------------------
|
120 |
+
layer_keys = [re.sub("^bbox\\.pred", "bbox_pred", k) for k in layer_keys]
|
121 |
+
layer_keys = [re.sub("^cls\\.score", "cls_score", k) for k in layer_keys]
|
122 |
+
layer_keys = [re.sub("^fc6\\.", "box_head.fc1.", k) for k in layer_keys]
|
123 |
+
layer_keys = [re.sub("^fc7\\.", "box_head.fc2.", k) for k in layer_keys]
|
124 |
+
# 4conv1fc head tensor names: head_conv1_w, head_conv1_gn_s
|
125 |
+
layer_keys = [re.sub("^head\\.conv", "box_head.conv", k) for k in layer_keys]
|
126 |
+
|
127 |
+
# --------------------------------------------------------------------------
|
128 |
+
# FPN lateral and output convolutions
|
129 |
+
# --------------------------------------------------------------------------
|
130 |
+
def fpn_map(name):
|
131 |
+
"""
|
132 |
+
Look for keys with the following patterns:
|
133 |
+
1) Starts with "fpn.inner."
|
134 |
+
Example: "fpn.inner.res2.2.sum.lateral.weight"
|
135 |
+
Meaning: These are lateral pathway convolutions
|
136 |
+
2) Starts with "fpn.res"
|
137 |
+
Example: "fpn.res2.2.sum.weight"
|
138 |
+
Meaning: These are FPN output convolutions
|
139 |
+
"""
|
140 |
+
splits = name.split(".")
|
141 |
+
norm = ".norm" if "norm" in splits else ""
|
142 |
+
if name.startswith("fpn.inner."):
|
143 |
+
# splits example: ['fpn', 'inner', 'res2', '2', 'sum', 'lateral', 'weight']
|
144 |
+
stage = int(splits[2][len("res") :])
|
145 |
+
return "fpn_lateral{}{}.{}".format(stage, norm, splits[-1])
|
146 |
+
elif name.startswith("fpn.res"):
|
147 |
+
# splits example: ['fpn', 'res2', '2', 'sum', 'weight']
|
148 |
+
stage = int(splits[1][len("res") :])
|
149 |
+
return "fpn_output{}{}.{}".format(stage, norm, splits[-1])
|
150 |
+
return name
|
151 |
+
|
152 |
+
layer_keys = [fpn_map(k) for k in layer_keys]
|
153 |
+
|
154 |
+
# --------------------------------------------------------------------------
|
155 |
+
# Mask R-CNN mask head
|
156 |
+
# --------------------------------------------------------------------------
|
157 |
+
# roi_heads.StandardROIHeads case
|
158 |
+
layer_keys = [k.replace(".[mask].fcn", "mask_head.mask_fcn") for k in layer_keys]
|
159 |
+
layer_keys = [re.sub("^\\.mask\\.fcn", "mask_head.mask_fcn", k) for k in layer_keys]
|
160 |
+
layer_keys = [k.replace("mask.fcn.logits", "mask_head.predictor") for k in layer_keys]
|
161 |
+
# roi_heads.Res5ROIHeads case
|
162 |
+
layer_keys = [k.replace("conv5.mask", "mask_head.deconv") for k in layer_keys]
|
163 |
+
|
164 |
+
# --------------------------------------------------------------------------
|
165 |
+
# Keypoint R-CNN head
|
166 |
+
# --------------------------------------------------------------------------
|
167 |
+
# interestingly, the keypoint head convs have blob names that are simply "conv_fcnX"
|
168 |
+
layer_keys = [k.replace("conv.fcn", "roi_heads.keypoint_head.conv_fcn") for k in layer_keys]
|
169 |
+
layer_keys = [
|
170 |
+
k.replace("kps.score.lowres", "roi_heads.keypoint_head.score_lowres") for k in layer_keys
|
171 |
+
]
|
172 |
+
layer_keys = [k.replace("kps.score.", "roi_heads.keypoint_head.score.") for k in layer_keys]
|
173 |
+
|
174 |
+
# --------------------------------------------------------------------------
|
175 |
+
# Done with replacements
|
176 |
+
# --------------------------------------------------------------------------
|
177 |
+
assert len(set(layer_keys)) == len(layer_keys)
|
178 |
+
assert len(original_keys) == len(layer_keys)
|
179 |
+
|
180 |
+
new_weights = {}
|
181 |
+
new_keys_to_original_keys = {}
|
182 |
+
for orig, renamed in zip(original_keys, layer_keys):
|
183 |
+
new_keys_to_original_keys[renamed] = orig
|
184 |
+
if renamed.startswith("bbox_pred.") or renamed.startswith("mask_head.predictor."):
|
185 |
+
# remove the meaningless prediction weight for background class
|
186 |
+
new_start_idx = 4 if renamed.startswith("bbox_pred.") else 1
|
187 |
+
new_weights[renamed] = weights[orig][new_start_idx:]
|
188 |
+
logger.info(
|
189 |
+
"Remove prediction weight for background class in {}. The shape changes from "
|
190 |
+
"{} to {}.".format(
|
191 |
+
renamed, tuple(weights[orig].shape), tuple(new_weights[renamed].shape)
|
192 |
+
)
|
193 |
+
)
|
194 |
+
elif renamed.startswith("cls_score."):
|
195 |
+
# move weights of bg class from original index 0 to last index
|
196 |
+
logger.info(
|
197 |
+
"Move classification weights for background class in {} from index 0 to "
|
198 |
+
"index {}.".format(renamed, weights[orig].shape[0] - 1)
|
199 |
+
)
|
200 |
+
new_weights[renamed] = torch.cat([weights[orig][1:], weights[orig][:1]])
|
201 |
+
else:
|
202 |
+
new_weights[renamed] = weights[orig]
|
203 |
+
|
204 |
+
return new_weights, new_keys_to_original_keys
|
205 |
+
|
206 |
+
|
207 |
+
# Note the current matching is not symmetric.
|
208 |
+
# it assumes model_state_dict will have longer names.
|
209 |
+
def align_and_update_state_dicts(model_state_dict, ckpt_state_dict, c2_conversion=True):
|
210 |
+
"""
|
211 |
+
Match names between the two state-dict, and returns a new chkpt_state_dict with names
|
212 |
+
converted to match model_state_dict with heuristics. The returned dict can be later
|
213 |
+
loaded with fvcore checkpointer.
|
214 |
+
If `c2_conversion==True`, `ckpt_state_dict` is assumed to be a Caffe2
|
215 |
+
model and will be renamed at first.
|
216 |
+
|
217 |
+
Strategy: suppose that the models that we will create will have prefixes appended
|
218 |
+
to each of its keys, for example due to an extra level of nesting that the original
|
219 |
+
pre-trained weights from ImageNet won't contain. For example, model.state_dict()
|
220 |
+
might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains
|
221 |
+
res2.conv1.weight. We thus want to match both parameters together.
|
222 |
+
For that, we look for each model weight, look among all loaded keys if there is one
|
223 |
+
that is a suffix of the current weight name, and use it if that's the case.
|
224 |
+
If multiple matches exist, take the one with longest size
|
225 |
+
of the corresponding name. For example, for the same model as before, the pretrained
|
226 |
+
weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case,
|
227 |
+
we want to match backbone[0].body.conv1.weight to conv1.weight, and
|
228 |
+
backbone[0].body.res2.conv1.weight to res2.conv1.weight.
|
229 |
+
"""
|
230 |
+
model_keys = sorted(model_state_dict.keys())
|
231 |
+
if c2_conversion:
|
232 |
+
ckpt_state_dict, original_keys = convert_c2_detectron_names(ckpt_state_dict)
|
233 |
+
# original_keys: the name in the original dict (before renaming)
|
234 |
+
else:
|
235 |
+
original_keys = {x: x for x in ckpt_state_dict.keys()}
|
236 |
+
ckpt_keys = sorted(ckpt_state_dict.keys())
|
237 |
+
|
238 |
+
def match(a, b):
|
239 |
+
# Matched ckpt_key should be a complete (starts with '.') suffix.
|
240 |
+
# For example, roi_heads.mesh_head.whatever_conv1 does not match conv1,
|
241 |
+
# but matches whatever_conv1 or mesh_head.whatever_conv1.
|
242 |
+
return a == b or a.endswith("." + b)
|
243 |
+
|
244 |
+
# get a matrix of string matches, where each (i, j) entry correspond to the size of the
|
245 |
+
# ckpt_key string, if it matches
|
246 |
+
match_matrix = [len(j) if match(i, j) else 0 for i in model_keys for j in ckpt_keys]
|
247 |
+
match_matrix = torch.as_tensor(match_matrix).view(len(model_keys), len(ckpt_keys))
|
248 |
+
# use the matched one with longest size in case of multiple matches
|
249 |
+
max_match_size, idxs = match_matrix.max(1)
|
250 |
+
# remove indices that correspond to no-match
|
251 |
+
idxs[max_match_size == 0] = -1
|
252 |
+
|
253 |
+
logger = logging.getLogger(__name__)
|
254 |
+
# matched_pairs (matched checkpoint key --> matched model key)
|
255 |
+
matched_keys = {}
|
256 |
+
result_state_dict = {}
|
257 |
+
for idx_model, idx_ckpt in enumerate(idxs.tolist()):
|
258 |
+
if idx_ckpt == -1:
|
259 |
+
continue
|
260 |
+
key_model = model_keys[idx_model]
|
261 |
+
key_ckpt = ckpt_keys[idx_ckpt]
|
262 |
+
value_ckpt = ckpt_state_dict[key_ckpt]
|
263 |
+
shape_in_model = model_state_dict[key_model].shape
|
264 |
+
|
265 |
+
if shape_in_model != value_ckpt.shape:
|
266 |
+
logger.warning(
|
267 |
+
"Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format(
|
268 |
+
key_ckpt, value_ckpt.shape, key_model, shape_in_model
|
269 |
+
)
|
270 |
+
)
|
271 |
+
logger.warning(
|
272 |
+
"{} will not be loaded. Please double check and see if this is desired.".format(
|
273 |
+
key_ckpt
|
274 |
+
)
|
275 |
+
)
|
276 |
+
continue
|
277 |
+
|
278 |
+
assert key_model not in result_state_dict
|
279 |
+
result_state_dict[key_model] = value_ckpt
|
280 |
+
if key_ckpt in matched_keys: # already added to matched_keys
|
281 |
+
logger.error(
|
282 |
+
"Ambiguity found for {} in checkpoint!"
|
283 |
+
"It matches at least two keys in the model ({} and {}).".format(
|
284 |
+
key_ckpt, key_model, matched_keys[key_ckpt]
|
285 |
+
)
|
286 |
+
)
|
287 |
+
raise ValueError("Cannot match one checkpoint key to multiple keys in the model.")
|
288 |
+
|
289 |
+
matched_keys[key_ckpt] = key_model
|
290 |
+
|
291 |
+
# logging:
|
292 |
+
matched_model_keys = sorted(matched_keys.values())
|
293 |
+
if len(matched_model_keys) == 0:
|
294 |
+
logger.warning("No weights in checkpoint matched with model.")
|
295 |
+
return ckpt_state_dict
|
296 |
+
common_prefix = _longest_common_prefix(matched_model_keys)
|
297 |
+
rev_matched_keys = {v: k for k, v in matched_keys.items()}
|
298 |
+
original_keys = {k: original_keys[rev_matched_keys[k]] for k in matched_model_keys}
|
299 |
+
|
300 |
+
model_key_groups = _group_keys_by_module(matched_model_keys, original_keys)
|
301 |
+
table = []
|
302 |
+
memo = set()
|
303 |
+
for key_model in matched_model_keys:
|
304 |
+
if key_model in memo:
|
305 |
+
continue
|
306 |
+
if key_model in model_key_groups:
|
307 |
+
group = model_key_groups[key_model]
|
308 |
+
memo |= set(group)
|
309 |
+
shapes = [tuple(model_state_dict[k].shape) for k in group]
|
310 |
+
table.append(
|
311 |
+
(
|
312 |
+
_longest_common_prefix([k[len(common_prefix) :] for k in group]) + "*",
|
313 |
+
_group_str([original_keys[k] for k in group]),
|
314 |
+
" ".join([str(x).replace(" ", "") for x in shapes]),
|
315 |
+
)
|
316 |
+
)
|
317 |
+
else:
|
318 |
+
key_checkpoint = original_keys[key_model]
|
319 |
+
shape = str(tuple(model_state_dict[key_model].shape))
|
320 |
+
table.append((key_model[len(common_prefix) :], key_checkpoint, shape))
|
321 |
+
table_str = tabulate(
|
322 |
+
table, tablefmt="pipe", headers=["Names in Model", "Names in Checkpoint", "Shapes"]
|
323 |
+
)
|
324 |
+
logger.info(
|
325 |
+
"Following weights matched with "
|
326 |
+
+ (f"submodule {common_prefix[:-1]}" if common_prefix else "model")
|
327 |
+
+ ":\n"
|
328 |
+
+ table_str
|
329 |
+
)
|
330 |
+
|
331 |
+
unmatched_ckpt_keys = [k for k in ckpt_keys if k not in set(matched_keys.keys())]
|
332 |
+
for k in unmatched_ckpt_keys:
|
333 |
+
result_state_dict[k] = ckpt_state_dict[k]
|
334 |
+
return result_state_dict
|
335 |
+
|
336 |
+
|
337 |
+
def _group_keys_by_module(keys: List[str], original_names: Dict[str, str]):
|
338 |
+
"""
|
339 |
+
Params in the same submodule are grouped together.
|
340 |
+
|
341 |
+
Args:
|
342 |
+
keys: names of all parameters
|
343 |
+
original_names: mapping from parameter name to their name in the checkpoint
|
344 |
+
|
345 |
+
Returns:
|
346 |
+
dict[name -> all other names in the same group]
|
347 |
+
"""
|
348 |
+
|
349 |
+
def _submodule_name(key):
|
350 |
+
pos = key.rfind(".")
|
351 |
+
if pos < 0:
|
352 |
+
return None
|
353 |
+
prefix = key[: pos + 1]
|
354 |
+
return prefix
|
355 |
+
|
356 |
+
all_submodules = [_submodule_name(k) for k in keys]
|
357 |
+
all_submodules = [x for x in all_submodules if x]
|
358 |
+
all_submodules = sorted(all_submodules, key=len)
|
359 |
+
|
360 |
+
ret = {}
|
361 |
+
for prefix in all_submodules:
|
362 |
+
group = [k for k in keys if k.startswith(prefix)]
|
363 |
+
if len(group) <= 1:
|
364 |
+
continue
|
365 |
+
original_name_lcp = _longest_common_prefix_str([original_names[k] for k in group])
|
366 |
+
if len(original_name_lcp) == 0:
|
367 |
+
# don't group weights if original names don't share prefix
|
368 |
+
continue
|
369 |
+
|
370 |
+
for k in group:
|
371 |
+
if k in ret:
|
372 |
+
continue
|
373 |
+
ret[k] = group
|
374 |
+
return ret
|
375 |
+
|
376 |
+
|
377 |
+
def _longest_common_prefix(names: List[str]) -> str:
|
378 |
+
"""
|
379 |
+
["abc.zfg", "abc.zef"] -> "abc."
|
380 |
+
"""
|
381 |
+
names = [n.split(".") for n in names]
|
382 |
+
m1, m2 = min(names), max(names)
|
383 |
+
ret = [a for a, b in zip(m1, m2) if a == b]
|
384 |
+
ret = ".".join(ret) + "." if len(ret) else ""
|
385 |
+
return ret
|
386 |
+
|
387 |
+
|
388 |
+
def _longest_common_prefix_str(names: List[str]) -> str:
|
389 |
+
m1, m2 = min(names), max(names)
|
390 |
+
lcp = [a for a, b in zip(m1, m2) if a == b]
|
391 |
+
lcp = "".join(lcp)
|
392 |
+
return lcp
|
393 |
+
|
394 |
+
|
395 |
+
def _group_str(names: List[str]) -> str:
|
396 |
+
"""
|
397 |
+
Turn "common1", "common2", "common3" into "common{1,2,3}"
|
398 |
+
"""
|
399 |
+
lcp = _longest_common_prefix_str(names)
|
400 |
+
rest = [x[len(lcp) :] for x in names]
|
401 |
+
rest = "{" + ",".join(rest) + "}"
|
402 |
+
ret = lcp + rest
|
403 |
+
|
404 |
+
# add some simplification for BN specifically
|
405 |
+
ret = ret.replace("bn_{beta,running_mean,running_var,gamma}", "bn_*")
|
406 |
+
ret = ret.replace("bn_beta,bn_running_mean,bn_running_var,bn_gamma", "bn_*")
|
407 |
+
return ret
|
detectron2/build/lib.linux-x86_64-3.10/detectron2/checkpoint/catalog.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
import logging
|
3 |
+
|
4 |
+
from detectron2.utils.file_io import PathHandler, PathManager
|
5 |
+
|
6 |
+
|
7 |
+
class ModelCatalog(object):
|
8 |
+
"""
|
9 |
+
Store mappings from names to third-party models.
|
10 |
+
"""
|
11 |
+
|
12 |
+
S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron"
|
13 |
+
|
14 |
+
# MSRA models have STRIDE_IN_1X1=True. False otherwise.
|
15 |
+
# NOTE: all BN models here have fused BN into an affine layer.
|
16 |
+
# As a result, you should only load them to a model with "FrozenBN".
|
17 |
+
# Loading them to a model with regular BN or SyncBN is wrong.
|
18 |
+
# Even when loaded to FrozenBN, it is still different from affine by an epsilon,
|
19 |
+
# which should be negligible for training.
|
20 |
+
# NOTE: all models here uses PIXEL_STD=[1,1,1]
|
21 |
+
# NOTE: Most of the BN models here are no longer used. We use the
|
22 |
+
# re-converted pre-trained models under detectron2 model zoo instead.
|
23 |
+
C2_IMAGENET_MODELS = {
|
24 |
+
"MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl",
|
25 |
+
"MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl",
|
26 |
+
"FAIR/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl",
|
27 |
+
"FAIR/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl",
|
28 |
+
"FAIR/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl",
|
29 |
+
"FAIR/X-101-64x4d": "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl",
|
30 |
+
"FAIR/X-152-32x8d-IN5k": "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl",
|
31 |
+
}
|
32 |
+
|
33 |
+
C2_DETECTRON_PATH_FORMAT = (
|
34 |
+
"{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl" # noqa B950
|
35 |
+
)
|
36 |
+
|
37 |
+
C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival"
|
38 |
+
C2_DATASET_COCO_KEYPOINTS = "keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival"
|
39 |
+
|
40 |
+
# format: {model_name} -> part of the url
|
41 |
+
C2_DETECTRON_MODELS = {
|
42 |
+
"35857197/e2e_faster_rcnn_R-50-C4_1x": "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW", # noqa B950
|
43 |
+
"35857345/e2e_faster_rcnn_R-50-FPN_1x": "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I", # noqa B950
|
44 |
+
"35857890/e2e_faster_rcnn_R-101-FPN_1x": "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7", # noqa B950
|
45 |
+
"36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ", # noqa B950
|
46 |
+
"35858791/e2e_mask_rcnn_R-50-C4_1x": "35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB", # noqa B950
|
47 |
+
"35858933/e2e_mask_rcnn_R-50-FPN_1x": "35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC", # noqa B950
|
48 |
+
"35861795/e2e_mask_rcnn_R-101-FPN_1x": "35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT", # noqa B950
|
49 |
+
"36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI", # noqa B950
|
50 |
+
"48616381/e2e_mask_rcnn_R-50-FPN_2x_gn": "GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q", # noqa B950
|
51 |
+
"37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao", # noqa B950
|
52 |
+
"35998355/rpn_R-50-C4_1x": "35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L", # noqa B950
|
53 |
+
"35998814/rpn_R-50-FPN_1x": "35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179", # noqa B950
|
54 |
+
"36225147/fast_R-50-FPN_1x": "36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2", # noqa B950
|
55 |
+
}
|
56 |
+
|
57 |
+
@staticmethod
|
58 |
+
def get(name):
|
59 |
+
if name.startswith("Caffe2Detectron/COCO"):
|
60 |
+
return ModelCatalog._get_c2_detectron_baseline(name)
|
61 |
+
if name.startswith("ImageNetPretrained/"):
|
62 |
+
return ModelCatalog._get_c2_imagenet_pretrained(name)
|
63 |
+
raise RuntimeError("model not present in the catalog: {}".format(name))
|
64 |
+
|
65 |
+
@staticmethod
|
66 |
+
def _get_c2_imagenet_pretrained(name):
|
67 |
+
prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX
|
68 |
+
name = name[len("ImageNetPretrained/") :]
|
69 |
+
name = ModelCatalog.C2_IMAGENET_MODELS[name]
|
70 |
+
url = "/".join([prefix, name])
|
71 |
+
return url
|
72 |
+
|
73 |
+
@staticmethod
|
74 |
+
def _get_c2_detectron_baseline(name):
|
75 |
+
name = name[len("Caffe2Detectron/COCO/") :]
|
76 |
+
url = ModelCatalog.C2_DETECTRON_MODELS[name]
|
77 |
+
if "keypoint_rcnn" in name:
|
78 |
+
dataset = ModelCatalog.C2_DATASET_COCO_KEYPOINTS
|
79 |
+
else:
|
80 |
+
dataset = ModelCatalog.C2_DATASET_COCO
|
81 |
+
|
82 |
+
if "35998355/rpn_R-50-C4_1x" in name:
|
83 |
+
# this one model is somehow different from others ..
|
84 |
+
type = "rpn"
|
85 |
+
else:
|
86 |
+
type = "generalized_rcnn"
|
87 |
+
|
88 |
+
# Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`.
|
89 |
+
url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format(
|
90 |
+
prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, url=url, type=type, dataset=dataset
|
91 |
+
)
|
92 |
+
return url
|
93 |
+
|
94 |
+
|
95 |
+
class ModelCatalogHandler(PathHandler):
|
96 |
+
"""
|
97 |
+
Resolve URL like catalog://.
|
98 |
+
"""
|
99 |
+
|
100 |
+
PREFIX = "catalog://"
|
101 |
+
|
102 |
+
def _get_supported_prefixes(self):
|
103 |
+
return [self.PREFIX]
|
104 |
+
|
105 |
+
def _get_local_path(self, path, **kwargs):
|
106 |
+
logger = logging.getLogger(__name__)
|
107 |
+
catalog_path = ModelCatalog.get(path[len(self.PREFIX) :])
|
108 |
+
logger.info("Catalog entry {} points to {}".format(path, catalog_path))
|
109 |
+
return PathManager.get_local_path(catalog_path, **kwargs)
|
110 |
+
|
111 |
+
def _open(self, path, mode="r", **kwargs):
|
112 |
+
return PathManager.open(self._get_local_path(path), mode, **kwargs)
|
113 |
+
|
114 |
+
|
115 |
+
PathManager.register_handler(ModelCatalogHandler())
|
detectron2/build/lib.linux-x86_64-3.10/detectron2/checkpoint/detection_checkpoint.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
import logging
|
3 |
+
import os
|
4 |
+
import pickle
|
5 |
+
import torch
|
6 |
+
from fvcore.common.checkpoint import Checkpointer
|
7 |
+
from torch.nn.parallel import DistributedDataParallel
|
8 |
+
|
9 |
+
import detectron2.utils.comm as comm
|
10 |
+
from detectron2.utils.file_io import PathManager
|
11 |
+
|
12 |
+
from .c2_model_loading import align_and_update_state_dicts
|
13 |
+
|
14 |
+
|
15 |
+
class DetectionCheckpointer(Checkpointer):
|
16 |
+
"""
|
17 |
+
Same as :class:`Checkpointer`, but is able to:
|
18 |
+
1. handle models in detectron & detectron2 model zoo, and apply conversions for legacy models.
|
19 |
+
2. correctly load checkpoints that are only available on the master worker
|
20 |
+
"""
|
21 |
+
|
22 |
+
def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
|
23 |
+
is_main_process = comm.is_main_process()
|
24 |
+
super().__init__(
|
25 |
+
model,
|
26 |
+
save_dir,
|
27 |
+
save_to_disk=is_main_process if save_to_disk is None else save_to_disk,
|
28 |
+
**checkpointables,
|
29 |
+
)
|
30 |
+
self.path_manager = PathManager
|
31 |
+
|
32 |
+
def load(self, path, *args, **kwargs):
|
33 |
+
need_sync = False
|
34 |
+
|
35 |
+
if path and isinstance(self.model, DistributedDataParallel):
|
36 |
+
logger = logging.getLogger(__name__)
|
37 |
+
path = self.path_manager.get_local_path(path)
|
38 |
+
has_file = os.path.isfile(path)
|
39 |
+
all_has_file = comm.all_gather(has_file)
|
40 |
+
if not all_has_file[0]:
|
41 |
+
raise OSError(f"File {path} not found on main worker.")
|
42 |
+
if not all(all_has_file):
|
43 |
+
logger.warning(
|
44 |
+
f"Not all workers can read checkpoint {path}. "
|
45 |
+
"Training may fail to fully resume."
|
46 |
+
)
|
47 |
+
# TODO: broadcast the checkpoint file contents from main
|
48 |
+
# worker, and load from it instead.
|
49 |
+
need_sync = True
|
50 |
+
if not has_file:
|
51 |
+
path = None # don't load if not readable
|
52 |
+
ret = super().load(path, *args, **kwargs)
|
53 |
+
|
54 |
+
if need_sync:
|
55 |
+
logger.info("Broadcasting model states from main worker ...")
|
56 |
+
self.model._sync_params_and_buffers()
|
57 |
+
return ret
|
58 |
+
|
59 |
+
def _load_file(self, filename):
|
60 |
+
if filename.endswith(".pkl"):
|
61 |
+
with PathManager.open(filename, "rb") as f:
|
62 |
+
data = pickle.load(f, encoding="latin1")
|
63 |
+
if "model" in data and "__author__" in data:
|
64 |
+
# file is in Detectron2 model zoo format
|
65 |
+
self.logger.info("Reading a file from '{}'".format(data["__author__"]))
|
66 |
+
return data
|
67 |
+
else:
|
68 |
+
# assume file is from Caffe2 / Detectron1 model zoo
|
69 |
+
if "blobs" in data:
|
70 |
+
# Detection models have "blobs", but ImageNet models don't
|
71 |
+
data = data["blobs"]
|
72 |
+
data = {k: v for k, v in data.items() if not k.endswith("_momentum")}
|
73 |
+
return {"model": data, "__author__": "Caffe2", "matching_heuristics": True}
|
74 |
+
elif filename.endswith(".pyth"):
|
75 |
+
# assume file is from pycls; no one else seems to use the ".pyth" extension
|
76 |
+
with PathManager.open(filename, "rb") as f:
|
77 |
+
data = torch.load(f)
|
78 |
+
assert (
|
79 |
+
"model_state" in data
|
80 |
+
), f"Cannot load .pyth file {filename}; pycls checkpoints must contain 'model_state'."
|
81 |
+
model_state = {
|
82 |
+
k: v
|
83 |
+
for k, v in data["model_state"].items()
|
84 |
+
if not k.endswith("num_batches_tracked")
|
85 |
+
}
|
86 |
+
return {"model": model_state, "__author__": "pycls", "matching_heuristics": True}
|
87 |
+
|
88 |
+
loaded = super()._load_file(filename) # load native pth checkpoint
|
89 |
+
if "model" not in loaded:
|
90 |
+
loaded = {"model": loaded}
|
91 |
+
loaded["matching_heuristics"] = True
|
92 |
+
return loaded
|
93 |
+
|
94 |
+
def _load_model(self, checkpoint):
|
95 |
+
if checkpoint.get("matching_heuristics", False):
|
96 |
+
self._convert_ndarray_to_tensor(checkpoint["model"])
|
97 |
+
# convert weights by name-matching heuristics
|
98 |
+
checkpoint["model"] = align_and_update_state_dicts(
|
99 |
+
self.model.state_dict(),
|
100 |
+
checkpoint["model"],
|
101 |
+
c2_conversion=checkpoint.get("__author__", None) == "Caffe2",
|
102 |
+
)
|
103 |
+
# for non-caffe2 models, use standard ways to load it
|
104 |
+
incompatible = super()._load_model(checkpoint)
|
105 |
+
|
106 |
+
model_buffers = dict(self.model.named_buffers(recurse=False))
|
107 |
+
for k in ["pixel_mean", "pixel_std"]:
|
108 |
+
# Ignore missing key message about pixel_mean/std.
|
109 |
+
# Though they may be missing in old checkpoints, they will be correctly
|
110 |
+
# initialized from config anyway.
|
111 |
+
if k in model_buffers:
|
112 |
+
try:
|
113 |
+
incompatible.missing_keys.remove(k)
|
114 |
+
except ValueError:
|
115 |
+
pass
|
116 |
+
for k in incompatible.unexpected_keys[:]:
|
117 |
+
# Ignore unexpected keys about cell anchors. They exist in old checkpoints
|
118 |
+
# but now they are non-persistent buffers and will not be in new checkpoints.
|
119 |
+
if "anchor_generator.cell_anchors" in k:
|
120 |
+
incompatible.unexpected_keys.remove(k)
|
121 |
+
return incompatible
|
detectron2/build/lib.linux-x86_64-3.10/detectron2/config/__init__.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
from .compat import downgrade_config, upgrade_config
|
3 |
+
from .config import CfgNode, get_cfg, global_cfg, set_global_cfg, configurable
|
4 |
+
from .instantiate import instantiate
|
5 |
+
from .lazy import LazyCall, LazyConfig
|
6 |
+
|
7 |
+
__all__ = [
|
8 |
+
"CfgNode",
|
9 |
+
"get_cfg",
|
10 |
+
"global_cfg",
|
11 |
+
"set_global_cfg",
|
12 |
+
"downgrade_config",
|
13 |
+
"upgrade_config",
|
14 |
+
"configurable",
|
15 |
+
"instantiate",
|
16 |
+
"LazyCall",
|
17 |
+
"LazyConfig",
|
18 |
+
]
|
19 |
+
|
20 |
+
|
21 |
+
from detectron2.utils.env import fixup_module_metadata
|
22 |
+
|
23 |
+
fixup_module_metadata(__name__, globals(), __all__)
|
24 |
+
del fixup_module_metadata
|
detectron2/build/lib.linux-x86_64-3.10/detectron2/config/compat.py
ADDED
@@ -0,0 +1,229 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
"""
|
3 |
+
Backward compatibility of configs.
|
4 |
+
|
5 |
+
Instructions to bump version:
|
6 |
+
+ It's not needed to bump version if new keys are added.
|
7 |
+
It's only needed when backward-incompatible changes happen
|
8 |
+
(i.e., some existing keys disappear, or the meaning of a key changes)
|
9 |
+
+ To bump version, do the following:
|
10 |
+
1. Increment _C.VERSION in defaults.py
|
11 |
+
2. Add a converter in this file.
|
12 |
+
|
13 |
+
Each ConverterVX has a function "upgrade" which in-place upgrades config from X-1 to X,
|
14 |
+
and a function "downgrade" which in-place downgrades config from X to X-1
|
15 |
+
|
16 |
+
In each function, VERSION is left unchanged.
|
17 |
+
|
18 |
+
Each converter assumes that its input has the relevant keys
|
19 |
+
(i.e., the input is not a partial config).
|
20 |
+
3. Run the tests (test_config.py) to make sure the upgrade & downgrade
|
21 |
+
functions are consistent.
|
22 |
+
"""
|
23 |
+
|
24 |
+
import logging
|
25 |
+
from typing import List, Optional, Tuple
|
26 |
+
|
27 |
+
from .config import CfgNode as CN
|
28 |
+
from .defaults import _C
|
29 |
+
|
30 |
+
__all__ = ["upgrade_config", "downgrade_config"]
|
31 |
+
|
32 |
+
|
33 |
+
def upgrade_config(cfg: CN, to_version: Optional[int] = None) -> CN:
|
34 |
+
"""
|
35 |
+
Upgrade a config from its current version to a newer version.
|
36 |
+
|
37 |
+
Args:
|
38 |
+
cfg (CfgNode):
|
39 |
+
to_version (int): defaults to the latest version.
|
40 |
+
"""
|
41 |
+
cfg = cfg.clone()
|
42 |
+
if to_version is None:
|
43 |
+
to_version = _C.VERSION
|
44 |
+
|
45 |
+
assert cfg.VERSION <= to_version, "Cannot upgrade from v{} to v{}!".format(
|
46 |
+
cfg.VERSION, to_version
|
47 |
+
)
|
48 |
+
for k in range(cfg.VERSION, to_version):
|
49 |
+
converter = globals()["ConverterV" + str(k + 1)]
|
50 |
+
converter.upgrade(cfg)
|
51 |
+
cfg.VERSION = k + 1
|
52 |
+
return cfg
|
53 |
+
|
54 |
+
|
55 |
+
def downgrade_config(cfg: CN, to_version: int) -> CN:
|
56 |
+
"""
|
57 |
+
Downgrade a config from its current version to an older version.
|
58 |
+
|
59 |
+
Args:
|
60 |
+
cfg (CfgNode):
|
61 |
+
to_version (int):
|
62 |
+
|
63 |
+
Note:
|
64 |
+
A general downgrade of arbitrary configs is not always possible due to the
|
65 |
+
different functionalities in different versions.
|
66 |
+
The purpose of downgrade is only to recover the defaults in old versions,
|
67 |
+
allowing it to load an old partial yaml config.
|
68 |
+
Therefore, the implementation only needs to fill in the default values
|
69 |
+
in the old version when a general downgrade is not possible.
|
70 |
+
"""
|
71 |
+
cfg = cfg.clone()
|
72 |
+
assert cfg.VERSION >= to_version, "Cannot downgrade from v{} to v{}!".format(
|
73 |
+
cfg.VERSION, to_version
|
74 |
+
)
|
75 |
+
for k in range(cfg.VERSION, to_version, -1):
|
76 |
+
converter = globals()["ConverterV" + str(k)]
|
77 |
+
converter.downgrade(cfg)
|
78 |
+
cfg.VERSION = k - 1
|
79 |
+
return cfg
|
80 |
+
|
81 |
+
|
82 |
+
def guess_version(cfg: CN, filename: str) -> int:
|
83 |
+
"""
|
84 |
+
Guess the version of a partial config where the VERSION field is not specified.
|
85 |
+
Returns the version, or the latest if cannot make a guess.
|
86 |
+
|
87 |
+
This makes it easier for users to migrate.
|
88 |
+
"""
|
89 |
+
logger = logging.getLogger(__name__)
|
90 |
+
|
91 |
+
def _has(name: str) -> bool:
|
92 |
+
cur = cfg
|
93 |
+
for n in name.split("."):
|
94 |
+
if n not in cur:
|
95 |
+
return False
|
96 |
+
cur = cur[n]
|
97 |
+
return True
|
98 |
+
|
99 |
+
# Most users' partial configs have "MODEL.WEIGHT", so guess on it
|
100 |
+
ret = None
|
101 |
+
if _has("MODEL.WEIGHT") or _has("TEST.AUG_ON"):
|
102 |
+
ret = 1
|
103 |
+
|
104 |
+
if ret is not None:
|
105 |
+
logger.warning("Config '{}' has no VERSION. Assuming it to be v{}.".format(filename, ret))
|
106 |
+
else:
|
107 |
+
ret = _C.VERSION
|
108 |
+
logger.warning(
|
109 |
+
"Config '{}' has no VERSION. Assuming it to be compatible with latest v{}.".format(
|
110 |
+
filename, ret
|
111 |
+
)
|
112 |
+
)
|
113 |
+
return ret
|
114 |
+
|
115 |
+
|
116 |
+
def _rename(cfg: CN, old: str, new: str) -> None:
|
117 |
+
old_keys = old.split(".")
|
118 |
+
new_keys = new.split(".")
|
119 |
+
|
120 |
+
def _set(key_seq: List[str], val: str) -> None:
|
121 |
+
cur = cfg
|
122 |
+
for k in key_seq[:-1]:
|
123 |
+
if k not in cur:
|
124 |
+
cur[k] = CN()
|
125 |
+
cur = cur[k]
|
126 |
+
cur[key_seq[-1]] = val
|
127 |
+
|
128 |
+
def _get(key_seq: List[str]) -> CN:
|
129 |
+
cur = cfg
|
130 |
+
for k in key_seq:
|
131 |
+
cur = cur[k]
|
132 |
+
return cur
|
133 |
+
|
134 |
+
def _del(key_seq: List[str]) -> None:
|
135 |
+
cur = cfg
|
136 |
+
for k in key_seq[:-1]:
|
137 |
+
cur = cur[k]
|
138 |
+
del cur[key_seq[-1]]
|
139 |
+
if len(cur) == 0 and len(key_seq) > 1:
|
140 |
+
_del(key_seq[:-1])
|
141 |
+
|
142 |
+
_set(new_keys, _get(old_keys))
|
143 |
+
_del(old_keys)
|
144 |
+
|
145 |
+
|
146 |
+
class _RenameConverter:
|
147 |
+
"""
|
148 |
+
A converter that handles simple rename.
|
149 |
+
"""
|
150 |
+
|
151 |
+
RENAME: List[Tuple[str, str]] = [] # list of tuples of (old name, new name)
|
152 |
+
|
153 |
+
@classmethod
|
154 |
+
def upgrade(cls, cfg: CN) -> None:
|
155 |
+
for old, new in cls.RENAME:
|
156 |
+
_rename(cfg, old, new)
|
157 |
+
|
158 |
+
@classmethod
|
159 |
+
def downgrade(cls, cfg: CN) -> None:
|
160 |
+
for old, new in cls.RENAME[::-1]:
|
161 |
+
_rename(cfg, new, old)
|
162 |
+
|
163 |
+
|
164 |
+
class ConverterV1(_RenameConverter):
|
165 |
+
RENAME = [("MODEL.RPN_HEAD.NAME", "MODEL.RPN.HEAD_NAME")]
|
166 |
+
|
167 |
+
|
168 |
+
class ConverterV2(_RenameConverter):
|
169 |
+
"""
|
170 |
+
A large bulk of rename, before public release.
|
171 |
+
"""
|
172 |
+
|
173 |
+
RENAME = [
|
174 |
+
("MODEL.WEIGHT", "MODEL.WEIGHTS"),
|
175 |
+
("MODEL.PANOPTIC_FPN.SEMANTIC_LOSS_SCALE", "MODEL.SEM_SEG_HEAD.LOSS_WEIGHT"),
|
176 |
+
("MODEL.PANOPTIC_FPN.RPN_LOSS_SCALE", "MODEL.RPN.LOSS_WEIGHT"),
|
177 |
+
("MODEL.PANOPTIC_FPN.INSTANCE_LOSS_SCALE", "MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT"),
|
178 |
+
("MODEL.PANOPTIC_FPN.COMBINE_ON", "MODEL.PANOPTIC_FPN.COMBINE.ENABLED"),
|
179 |
+
(
|
180 |
+
"MODEL.PANOPTIC_FPN.COMBINE_OVERLAP_THRESHOLD",
|
181 |
+
"MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH",
|
182 |
+
),
|
183 |
+
(
|
184 |
+
"MODEL.PANOPTIC_FPN.COMBINE_STUFF_AREA_LIMIT",
|
185 |
+
"MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT",
|
186 |
+
),
|
187 |
+
(
|
188 |
+
"MODEL.PANOPTIC_FPN.COMBINE_INSTANCES_CONFIDENCE_THRESHOLD",
|
189 |
+
"MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH",
|
190 |
+
),
|
191 |
+
("MODEL.ROI_HEADS.SCORE_THRESH", "MODEL.ROI_HEADS.SCORE_THRESH_TEST"),
|
192 |
+
("MODEL.ROI_HEADS.NMS", "MODEL.ROI_HEADS.NMS_THRESH_TEST"),
|
193 |
+
("MODEL.RETINANET.INFERENCE_SCORE_THRESHOLD", "MODEL.RETINANET.SCORE_THRESH_TEST"),
|
194 |
+
("MODEL.RETINANET.INFERENCE_TOPK_CANDIDATES", "MODEL.RETINANET.TOPK_CANDIDATES_TEST"),
|
195 |
+
("MODEL.RETINANET.INFERENCE_NMS_THRESHOLD", "MODEL.RETINANET.NMS_THRESH_TEST"),
|
196 |
+
("TEST.DETECTIONS_PER_IMG", "TEST.DETECTIONS_PER_IMAGE"),
|
197 |
+
("TEST.AUG_ON", "TEST.AUG.ENABLED"),
|
198 |
+
("TEST.AUG_MIN_SIZES", "TEST.AUG.MIN_SIZES"),
|
199 |
+
("TEST.AUG_MAX_SIZE", "TEST.AUG.MAX_SIZE"),
|
200 |
+
("TEST.AUG_FLIP", "TEST.AUG.FLIP"),
|
201 |
+
]
|
202 |
+
|
203 |
+
@classmethod
|
204 |
+
def upgrade(cls, cfg: CN) -> None:
|
205 |
+
super().upgrade(cfg)
|
206 |
+
|
207 |
+
if cfg.MODEL.META_ARCHITECTURE == "RetinaNet":
|
208 |
+
_rename(
|
209 |
+
cfg, "MODEL.RETINANET.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS"
|
210 |
+
)
|
211 |
+
_rename(cfg, "MODEL.RETINANET.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES")
|
212 |
+
del cfg["MODEL"]["RPN"]["ANCHOR_SIZES"]
|
213 |
+
del cfg["MODEL"]["RPN"]["ANCHOR_ASPECT_RATIOS"]
|
214 |
+
else:
|
215 |
+
_rename(cfg, "MODEL.RPN.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS")
|
216 |
+
_rename(cfg, "MODEL.RPN.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES")
|
217 |
+
del cfg["MODEL"]["RETINANET"]["ANCHOR_SIZES"]
|
218 |
+
del cfg["MODEL"]["RETINANET"]["ANCHOR_ASPECT_RATIOS"]
|
219 |
+
del cfg["MODEL"]["RETINANET"]["ANCHOR_STRIDES"]
|
220 |
+
|
221 |
+
@classmethod
|
222 |
+
def downgrade(cls, cfg: CN) -> None:
|
223 |
+
super().downgrade(cfg)
|
224 |
+
|
225 |
+
_rename(cfg, "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS", "MODEL.RPN.ANCHOR_ASPECT_RATIOS")
|
226 |
+
_rename(cfg, "MODEL.ANCHOR_GENERATOR.SIZES", "MODEL.RPN.ANCHOR_SIZES")
|
227 |
+
cfg.MODEL.RETINANET.ANCHOR_ASPECT_RATIOS = cfg.MODEL.RPN.ANCHOR_ASPECT_RATIOS
|
228 |
+
cfg.MODEL.RETINANET.ANCHOR_SIZES = cfg.MODEL.RPN.ANCHOR_SIZES
|
229 |
+
cfg.MODEL.RETINANET.ANCHOR_STRIDES = [] # this is not used anywhere in any version
|
detectron2/build/lib.linux-x86_64-3.10/detectron2/config/config.py
ADDED
@@ -0,0 +1,265 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
3 |
+
|
4 |
+
import functools
|
5 |
+
import inspect
|
6 |
+
import logging
|
7 |
+
from fvcore.common.config import CfgNode as _CfgNode
|
8 |
+
|
9 |
+
from detectron2.utils.file_io import PathManager
|
10 |
+
|
11 |
+
|
12 |
+
class CfgNode(_CfgNode):
|
13 |
+
"""
|
14 |
+
The same as `fvcore.common.config.CfgNode`, but different in:
|
15 |
+
|
16 |
+
1. Use unsafe yaml loading by default.
|
17 |
+
Note that this may lead to arbitrary code execution: you must not
|
18 |
+
load a config file from untrusted sources before manually inspecting
|
19 |
+
the content of the file.
|
20 |
+
2. Support config versioning.
|
21 |
+
When attempting to merge an old config, it will convert the old config automatically.
|
22 |
+
|
23 |
+
.. automethod:: clone
|
24 |
+
.. automethod:: freeze
|
25 |
+
.. automethod:: defrost
|
26 |
+
.. automethod:: is_frozen
|
27 |
+
.. automethod:: load_yaml_with_base
|
28 |
+
.. automethod:: merge_from_list
|
29 |
+
.. automethod:: merge_from_other_cfg
|
30 |
+
"""
|
31 |
+
|
32 |
+
@classmethod
|
33 |
+
def _open_cfg(cls, filename):
|
34 |
+
return PathManager.open(filename, "r")
|
35 |
+
|
36 |
+
# Note that the default value of allow_unsafe is changed to True
|
37 |
+
def merge_from_file(self, cfg_filename: str, allow_unsafe: bool = True) -> None:
|
38 |
+
"""
|
39 |
+
Load content from the given config file and merge it into self.
|
40 |
+
|
41 |
+
Args:
|
42 |
+
cfg_filename: config filename
|
43 |
+
allow_unsafe: allow unsafe yaml syntax
|
44 |
+
"""
|
45 |
+
assert PathManager.isfile(cfg_filename), f"Config file '{cfg_filename}' does not exist!"
|
46 |
+
loaded_cfg = self.load_yaml_with_base(cfg_filename, allow_unsafe=allow_unsafe)
|
47 |
+
loaded_cfg = type(self)(loaded_cfg)
|
48 |
+
|
49 |
+
# defaults.py needs to import CfgNode
|
50 |
+
from .defaults import _C
|
51 |
+
|
52 |
+
latest_ver = _C.VERSION
|
53 |
+
assert (
|
54 |
+
latest_ver == self.VERSION
|
55 |
+
), "CfgNode.merge_from_file is only allowed on a config object of latest version!"
|
56 |
+
|
57 |
+
logger = logging.getLogger(__name__)
|
58 |
+
|
59 |
+
loaded_ver = loaded_cfg.get("VERSION", None)
|
60 |
+
if loaded_ver is None:
|
61 |
+
from .compat import guess_version
|
62 |
+
|
63 |
+
loaded_ver = guess_version(loaded_cfg, cfg_filename)
|
64 |
+
assert loaded_ver <= self.VERSION, "Cannot merge a v{} config into a v{} config.".format(
|
65 |
+
loaded_ver, self.VERSION
|
66 |
+
)
|
67 |
+
|
68 |
+
if loaded_ver == self.VERSION:
|
69 |
+
self.merge_from_other_cfg(loaded_cfg)
|
70 |
+
else:
|
71 |
+
# compat.py needs to import CfgNode
|
72 |
+
from .compat import upgrade_config, downgrade_config
|
73 |
+
|
74 |
+
logger.warning(
|
75 |
+
"Loading an old v{} config file '{}' by automatically upgrading to v{}. "
|
76 |
+
"See docs/CHANGELOG.md for instructions to update your files.".format(
|
77 |
+
loaded_ver, cfg_filename, self.VERSION
|
78 |
+
)
|
79 |
+
)
|
80 |
+
# To convert, first obtain a full config at an old version
|
81 |
+
old_self = downgrade_config(self, to_version=loaded_ver)
|
82 |
+
old_self.merge_from_other_cfg(loaded_cfg)
|
83 |
+
new_config = upgrade_config(old_self)
|
84 |
+
self.clear()
|
85 |
+
self.update(new_config)
|
86 |
+
|
87 |
+
def dump(self, *args, **kwargs):
|
88 |
+
"""
|
89 |
+
Returns:
|
90 |
+
str: a yaml string representation of the config
|
91 |
+
"""
|
92 |
+
# to make it show up in docs
|
93 |
+
return super().dump(*args, **kwargs)
|
94 |
+
|
95 |
+
|
96 |
+
global_cfg = CfgNode()
|
97 |
+
|
98 |
+
|
99 |
+
def get_cfg() -> CfgNode:
|
100 |
+
"""
|
101 |
+
Get a copy of the default config.
|
102 |
+
|
103 |
+
Returns:
|
104 |
+
a detectron2 CfgNode instance.
|
105 |
+
"""
|
106 |
+
from .defaults import _C
|
107 |
+
|
108 |
+
return _C.clone()
|
109 |
+
|
110 |
+
|
111 |
+
def set_global_cfg(cfg: CfgNode) -> None:
|
112 |
+
"""
|
113 |
+
Let the global config point to the given cfg.
|
114 |
+
|
115 |
+
Assume that the given "cfg" has the key "KEY", after calling
|
116 |
+
`set_global_cfg(cfg)`, the key can be accessed by:
|
117 |
+
::
|
118 |
+
from detectron2.config import global_cfg
|
119 |
+
print(global_cfg.KEY)
|
120 |
+
|
121 |
+
By using a hacky global config, you can access these configs anywhere,
|
122 |
+
without having to pass the config object or the values deep into the code.
|
123 |
+
This is a hacky feature introduced for quick prototyping / research exploration.
|
124 |
+
"""
|
125 |
+
global global_cfg
|
126 |
+
global_cfg.clear()
|
127 |
+
global_cfg.update(cfg)
|
128 |
+
|
129 |
+
|
130 |
+
def configurable(init_func=None, *, from_config=None):
|
131 |
+
"""
|
132 |
+
Decorate a function or a class's __init__ method so that it can be called
|
133 |
+
with a :class:`CfgNode` object using a :func:`from_config` function that translates
|
134 |
+
:class:`CfgNode` to arguments.
|
135 |
+
|
136 |
+
Examples:
|
137 |
+
::
|
138 |
+
# Usage 1: Decorator on __init__:
|
139 |
+
class A:
|
140 |
+
@configurable
|
141 |
+
def __init__(self, a, b=2, c=3):
|
142 |
+
pass
|
143 |
+
|
144 |
+
@classmethod
|
145 |
+
def from_config(cls, cfg): # 'cfg' must be the first argument
|
146 |
+
# Returns kwargs to be passed to __init__
|
147 |
+
return {"a": cfg.A, "b": cfg.B}
|
148 |
+
|
149 |
+
a1 = A(a=1, b=2) # regular construction
|
150 |
+
a2 = A(cfg) # construct with a cfg
|
151 |
+
a3 = A(cfg, b=3, c=4) # construct with extra overwrite
|
152 |
+
|
153 |
+
# Usage 2: Decorator on any function. Needs an extra from_config argument:
|
154 |
+
@configurable(from_config=lambda cfg: {"a: cfg.A, "b": cfg.B})
|
155 |
+
def a_func(a, b=2, c=3):
|
156 |
+
pass
|
157 |
+
|
158 |
+
a1 = a_func(a=1, b=2) # regular call
|
159 |
+
a2 = a_func(cfg) # call with a cfg
|
160 |
+
a3 = a_func(cfg, b=3, c=4) # call with extra overwrite
|
161 |
+
|
162 |
+
Args:
|
163 |
+
init_func (callable): a class's ``__init__`` method in usage 1. The
|
164 |
+
class must have a ``from_config`` classmethod which takes `cfg` as
|
165 |
+
the first argument.
|
166 |
+
from_config (callable): the from_config function in usage 2. It must take `cfg`
|
167 |
+
as its first argument.
|
168 |
+
"""
|
169 |
+
|
170 |
+
if init_func is not None:
|
171 |
+
assert (
|
172 |
+
inspect.isfunction(init_func)
|
173 |
+
and from_config is None
|
174 |
+
and init_func.__name__ == "__init__"
|
175 |
+
), "Incorrect use of @configurable. Check API documentation for examples."
|
176 |
+
|
177 |
+
@functools.wraps(init_func)
|
178 |
+
def wrapped(self, *args, **kwargs):
|
179 |
+
try:
|
180 |
+
from_config_func = type(self).from_config
|
181 |
+
except AttributeError as e:
|
182 |
+
raise AttributeError(
|
183 |
+
"Class with @configurable must have a 'from_config' classmethod."
|
184 |
+
) from e
|
185 |
+
if not inspect.ismethod(from_config_func):
|
186 |
+
raise TypeError("Class with @configurable must have a 'from_config' classmethod.")
|
187 |
+
|
188 |
+
if _called_with_cfg(*args, **kwargs):
|
189 |
+
explicit_args = _get_args_from_config(from_config_func, *args, **kwargs)
|
190 |
+
init_func(self, **explicit_args)
|
191 |
+
else:
|
192 |
+
init_func(self, *args, **kwargs)
|
193 |
+
|
194 |
+
return wrapped
|
195 |
+
|
196 |
+
else:
|
197 |
+
if from_config is None:
|
198 |
+
return configurable # @configurable() is made equivalent to @configurable
|
199 |
+
assert inspect.isfunction(
|
200 |
+
from_config
|
201 |
+
), "from_config argument of configurable must be a function!"
|
202 |
+
|
203 |
+
def wrapper(orig_func):
|
204 |
+
@functools.wraps(orig_func)
|
205 |
+
def wrapped(*args, **kwargs):
|
206 |
+
if _called_with_cfg(*args, **kwargs):
|
207 |
+
explicit_args = _get_args_from_config(from_config, *args, **kwargs)
|
208 |
+
return orig_func(**explicit_args)
|
209 |
+
else:
|
210 |
+
return orig_func(*args, **kwargs)
|
211 |
+
|
212 |
+
wrapped.from_config = from_config
|
213 |
+
return wrapped
|
214 |
+
|
215 |
+
return wrapper
|
216 |
+
|
217 |
+
|
218 |
+
def _get_args_from_config(from_config_func, *args, **kwargs):
|
219 |
+
"""
|
220 |
+
Use `from_config` to obtain explicit arguments.
|
221 |
+
|
222 |
+
Returns:
|
223 |
+
dict: arguments to be used for cls.__init__
|
224 |
+
"""
|
225 |
+
signature = inspect.signature(from_config_func)
|
226 |
+
if list(signature.parameters.keys())[0] != "cfg":
|
227 |
+
if inspect.isfunction(from_config_func):
|
228 |
+
name = from_config_func.__name__
|
229 |
+
else:
|
230 |
+
name = f"{from_config_func.__self__}.from_config"
|
231 |
+
raise TypeError(f"{name} must take 'cfg' as the first argument!")
|
232 |
+
support_var_arg = any(
|
233 |
+
param.kind in [param.VAR_POSITIONAL, param.VAR_KEYWORD]
|
234 |
+
for param in signature.parameters.values()
|
235 |
+
)
|
236 |
+
if support_var_arg: # forward all arguments to from_config, if from_config accepts them
|
237 |
+
ret = from_config_func(*args, **kwargs)
|
238 |
+
else:
|
239 |
+
# forward supported arguments to from_config
|
240 |
+
supported_arg_names = set(signature.parameters.keys())
|
241 |
+
extra_kwargs = {}
|
242 |
+
for name in list(kwargs.keys()):
|
243 |
+
if name not in supported_arg_names:
|
244 |
+
extra_kwargs[name] = kwargs.pop(name)
|
245 |
+
ret = from_config_func(*args, **kwargs)
|
246 |
+
# forward the other arguments to __init__
|
247 |
+
ret.update(extra_kwargs)
|
248 |
+
return ret
|
249 |
+
|
250 |
+
|
251 |
+
def _called_with_cfg(*args, **kwargs):
|
252 |
+
"""
|
253 |
+
Returns:
|
254 |
+
bool: whether the arguments contain CfgNode and should be considered
|
255 |
+
forwarded to from_config.
|
256 |
+
"""
|
257 |
+
from omegaconf import DictConfig
|
258 |
+
|
259 |
+
if len(args) and isinstance(args[0], (_CfgNode, DictConfig)):
|
260 |
+
return True
|
261 |
+
if isinstance(kwargs.pop("cfg", None), (_CfgNode, DictConfig)):
|
262 |
+
return True
|
263 |
+
# `from_config`'s first argument is forced to be "cfg".
|
264 |
+
# So the above check covers all cases.
|
265 |
+
return False
|
detectron2/build/lib.linux-x86_64-3.10/detectron2/config/defaults.py
ADDED
@@ -0,0 +1,646 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
from .config import CfgNode as CN
|
3 |
+
|
4 |
+
# NOTE: given the new config system
|
5 |
+
# (https://detectron2.readthedocs.io/en/latest/tutorials/lazyconfigs.html),
|
6 |
+
# we will stop adding new functionalities to default CfgNode.
|
7 |
+
|
8 |
+
# -----------------------------------------------------------------------------
|
9 |
+
# Convention about Training / Test specific parameters
|
10 |
+
# -----------------------------------------------------------------------------
|
11 |
+
# Whenever an argument can be either used for training or for testing, the
|
12 |
+
# corresponding name will be post-fixed by a _TRAIN for a training parameter,
|
13 |
+
# or _TEST for a test-specific parameter.
|
14 |
+
# For example, the number of images during training will be
|
15 |
+
# IMAGES_PER_BATCH_TRAIN, while the number of images for testing will be
|
16 |
+
# IMAGES_PER_BATCH_TEST
|
17 |
+
|
18 |
+
# -----------------------------------------------------------------------------
|
19 |
+
# Config definition
|
20 |
+
# -----------------------------------------------------------------------------
|
21 |
+
|
22 |
+
_C = CN()
|
23 |
+
|
24 |
+
# The version number, to upgrade from old configs to new ones if any
|
25 |
+
# changes happen. It's recommended to keep a VERSION in your config file.
|
26 |
+
_C.VERSION = 2
|
27 |
+
|
28 |
+
_C.MODEL = CN()
|
29 |
+
_C.MODEL.LOAD_PROPOSALS = False
|
30 |
+
_C.MODEL.MASK_ON = False
|
31 |
+
_C.MODEL.KEYPOINT_ON = False
|
32 |
+
_C.MODEL.DEVICE = "cuda"
|
33 |
+
_C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN"
|
34 |
+
|
35 |
+
# Path (a file path, or URL like detectron2://.., https://..) to a checkpoint file
|
36 |
+
# to be loaded to the model. You can find available models in the model zoo.
|
37 |
+
_C.MODEL.WEIGHTS = ""
|
38 |
+
|
39 |
+
# Values to be used for image normalization (BGR order, since INPUT.FORMAT defaults to BGR).
|
40 |
+
# To train on images of different number of channels, just set different mean & std.
|
41 |
+
# Default values are the mean pixel value from ImageNet: [103.53, 116.28, 123.675]
|
42 |
+
_C.MODEL.PIXEL_MEAN = [103.530, 116.280, 123.675]
|
43 |
+
# When using pre-trained models in Detectron1 or any MSRA models,
|
44 |
+
# std has been absorbed into its conv1 weights, so the std needs to be set 1.
|
45 |
+
# Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std)
|
46 |
+
_C.MODEL.PIXEL_STD = [1.0, 1.0, 1.0]
|
47 |
+
|
48 |
+
|
49 |
+
# -----------------------------------------------------------------------------
|
50 |
+
# INPUT
|
51 |
+
# -----------------------------------------------------------------------------
|
52 |
+
_C.INPUT = CN()
|
53 |
+
# By default, {MIN,MAX}_SIZE options are used in transforms.ResizeShortestEdge.
|
54 |
+
# Please refer to ResizeShortestEdge for detailed definition.
|
55 |
+
# Size of the smallest side of the image during training
|
56 |
+
_C.INPUT.MIN_SIZE_TRAIN = (800,)
|
57 |
+
# Sample size of smallest side by choice or random selection from range give by
|
58 |
+
# INPUT.MIN_SIZE_TRAIN
|
59 |
+
_C.INPUT.MIN_SIZE_TRAIN_SAMPLING = "choice"
|
60 |
+
# Maximum size of the side of the image during training
|
61 |
+
_C.INPUT.MAX_SIZE_TRAIN = 1333
|
62 |
+
# Size of the smallest side of the image during testing. Set to zero to disable resize in testing.
|
63 |
+
_C.INPUT.MIN_SIZE_TEST = 800
|
64 |
+
# Maximum size of the side of the image during testing
|
65 |
+
_C.INPUT.MAX_SIZE_TEST = 1333
|
66 |
+
# Mode for flipping images used in data augmentation during training
|
67 |
+
# choose one of ["horizontal, "vertical", "none"]
|
68 |
+
_C.INPUT.RANDOM_FLIP = "horizontal"
|
69 |
+
|
70 |
+
# `True` if cropping is used for data augmentation during training
|
71 |
+
_C.INPUT.CROP = CN({"ENABLED": False})
|
72 |
+
# Cropping type. See documentation of `detectron2.data.transforms.RandomCrop` for explanation.
|
73 |
+
_C.INPUT.CROP.TYPE = "relative_range"
|
74 |
+
# Size of crop in range (0, 1] if CROP.TYPE is "relative" or "relative_range" and in number of
|
75 |
+
# pixels if CROP.TYPE is "absolute"
|
76 |
+
_C.INPUT.CROP.SIZE = [0.9, 0.9]
|
77 |
+
|
78 |
+
|
79 |
+
# Whether the model needs RGB, YUV, HSV etc.
|
80 |
+
# Should be one of the modes defined here, as we use PIL to read the image:
|
81 |
+
# https://pillow.readthedocs.io/en/stable/handbook/concepts.html#concept-modes
|
82 |
+
# with BGR being the one exception. One can set image format to BGR, we will
|
83 |
+
# internally use RGB for conversion and flip the channels over
|
84 |
+
_C.INPUT.FORMAT = "BGR"
|
85 |
+
# The ground truth mask format that the model will use.
|
86 |
+
# Mask R-CNN supports either "polygon" or "bitmask" as ground truth.
|
87 |
+
_C.INPUT.MASK_FORMAT = "polygon" # alternative: "bitmask"
|
88 |
+
|
89 |
+
|
90 |
+
# -----------------------------------------------------------------------------
|
91 |
+
# Dataset
|
92 |
+
# -----------------------------------------------------------------------------
|
93 |
+
_C.DATASETS = CN()
|
94 |
+
# List of the dataset names for training. Must be registered in DatasetCatalog
|
95 |
+
# Samples from these datasets will be merged and used as one dataset.
|
96 |
+
_C.DATASETS.TRAIN = ()
|
97 |
+
# List of the pre-computed proposal files for training, which must be consistent
|
98 |
+
# with datasets listed in DATASETS.TRAIN.
|
99 |
+
_C.DATASETS.PROPOSAL_FILES_TRAIN = ()
|
100 |
+
# Number of top scoring precomputed proposals to keep for training
|
101 |
+
_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN = 2000
|
102 |
+
# List of the dataset names for testing. Must be registered in DatasetCatalog
|
103 |
+
_C.DATASETS.TEST = ()
|
104 |
+
# List of the pre-computed proposal files for test, which must be consistent
|
105 |
+
# with datasets listed in DATASETS.TEST.
|
106 |
+
_C.DATASETS.PROPOSAL_FILES_TEST = ()
|
107 |
+
# Number of top scoring precomputed proposals to keep for test
|
108 |
+
_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST = 1000
|
109 |
+
|
110 |
+
# -----------------------------------------------------------------------------
|
111 |
+
# DataLoader
|
112 |
+
# -----------------------------------------------------------------------------
|
113 |
+
_C.DATALOADER = CN()
|
114 |
+
# Number of data loading threads
|
115 |
+
_C.DATALOADER.NUM_WORKERS = 4
|
116 |
+
# If True, each batch should contain only images for which the aspect ratio
|
117 |
+
# is compatible. This groups portrait images together, and landscape images
|
118 |
+
# are not batched with portrait images.
|
119 |
+
_C.DATALOADER.ASPECT_RATIO_GROUPING = True
|
120 |
+
# Options: TrainingSampler, RepeatFactorTrainingSampler
|
121 |
+
_C.DATALOADER.SAMPLER_TRAIN = "TrainingSampler"
|
122 |
+
# Repeat threshold for RepeatFactorTrainingSampler
|
123 |
+
_C.DATALOADER.REPEAT_THRESHOLD = 0.0
|
124 |
+
# Tf True, when working on datasets that have instance annotations, the
|
125 |
+
# training dataloader will filter out images without associated annotations
|
126 |
+
_C.DATALOADER.FILTER_EMPTY_ANNOTATIONS = True
|
127 |
+
|
128 |
+
# ---------------------------------------------------------------------------- #
|
129 |
+
# Backbone options
|
130 |
+
# ---------------------------------------------------------------------------- #
|
131 |
+
_C.MODEL.BACKBONE = CN()
|
132 |
+
|
133 |
+
_C.MODEL.BACKBONE.NAME = "build_resnet_backbone"
|
134 |
+
# Freeze the first several stages so they are not trained.
|
135 |
+
# There are 5 stages in ResNet. The first is a convolution, and the following
|
136 |
+
# stages are each group of residual blocks.
|
137 |
+
_C.MODEL.BACKBONE.FREEZE_AT = 2
|
138 |
+
|
139 |
+
|
140 |
+
# ---------------------------------------------------------------------------- #
|
141 |
+
# FPN options
|
142 |
+
# ---------------------------------------------------------------------------- #
|
143 |
+
_C.MODEL.FPN = CN()
|
144 |
+
# Names of the input feature maps to be used by FPN
|
145 |
+
# They must have contiguous power of 2 strides
|
146 |
+
# e.g., ["res2", "res3", "res4", "res5"]
|
147 |
+
_C.MODEL.FPN.IN_FEATURES = []
|
148 |
+
_C.MODEL.FPN.OUT_CHANNELS = 256
|
149 |
+
|
150 |
+
# Options: "" (no norm), "GN"
|
151 |
+
_C.MODEL.FPN.NORM = ""
|
152 |
+
|
153 |
+
# Types for fusing the FPN top-down and lateral features. Can be either "sum" or "avg"
|
154 |
+
_C.MODEL.FPN.FUSE_TYPE = "sum"
|
155 |
+
|
156 |
+
|
157 |
+
# ---------------------------------------------------------------------------- #
|
158 |
+
# Proposal generator options
|
159 |
+
# ---------------------------------------------------------------------------- #
|
160 |
+
_C.MODEL.PROPOSAL_GENERATOR = CN()
|
161 |
+
# Current proposal generators include "RPN", "RRPN" and "PrecomputedProposals"
|
162 |
+
_C.MODEL.PROPOSAL_GENERATOR.NAME = "RPN"
|
163 |
+
# Proposal height and width both need to be greater than MIN_SIZE
|
164 |
+
# (a the scale used during training or inference)
|
165 |
+
_C.MODEL.PROPOSAL_GENERATOR.MIN_SIZE = 0
|
166 |
+
|
167 |
+
|
168 |
+
# ---------------------------------------------------------------------------- #
|
169 |
+
# Anchor generator options
|
170 |
+
# ---------------------------------------------------------------------------- #
|
171 |
+
_C.MODEL.ANCHOR_GENERATOR = CN()
|
172 |
+
# The generator can be any name in the ANCHOR_GENERATOR registry
|
173 |
+
_C.MODEL.ANCHOR_GENERATOR.NAME = "DefaultAnchorGenerator"
|
174 |
+
# Anchor sizes (i.e. sqrt of area) in absolute pixels w.r.t. the network input.
|
175 |
+
# Format: list[list[float]]. SIZES[i] specifies the list of sizes to use for
|
176 |
+
# IN_FEATURES[i]; len(SIZES) must be equal to len(IN_FEATURES) or 1.
|
177 |
+
# When len(SIZES) == 1, SIZES[0] is used for all IN_FEATURES.
|
178 |
+
_C.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64, 128, 256, 512]]
|
179 |
+
# Anchor aspect ratios. For each area given in `SIZES`, anchors with different aspect
|
180 |
+
# ratios are generated by an anchor generator.
|
181 |
+
# Format: list[list[float]]. ASPECT_RATIOS[i] specifies the list of aspect ratios (H/W)
|
182 |
+
# to use for IN_FEATURES[i]; len(ASPECT_RATIOS) == len(IN_FEATURES) must be true,
|
183 |
+
# or len(ASPECT_RATIOS) == 1 is true and aspect ratio list ASPECT_RATIOS[0] is used
|
184 |
+
# for all IN_FEATURES.
|
185 |
+
_C.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]]
|
186 |
+
# Anchor angles.
|
187 |
+
# list[list[float]], the angle in degrees, for each input feature map.
|
188 |
+
# ANGLES[i] specifies the list of angles for IN_FEATURES[i].
|
189 |
+
_C.MODEL.ANCHOR_GENERATOR.ANGLES = [[-90, 0, 90]]
|
190 |
+
# Relative offset between the center of the first anchor and the top-left corner of the image
|
191 |
+
# Value has to be in [0, 1). Recommend to use 0.5, which means half stride.
|
192 |
+
# The value is not expected to affect model accuracy.
|
193 |
+
_C.MODEL.ANCHOR_GENERATOR.OFFSET = 0.0
|
194 |
+
|
195 |
+
# ---------------------------------------------------------------------------- #
|
196 |
+
# RPN options
|
197 |
+
# ---------------------------------------------------------------------------- #
|
198 |
+
_C.MODEL.RPN = CN()
|
199 |
+
_C.MODEL.RPN.HEAD_NAME = "StandardRPNHead" # used by RPN_HEAD_REGISTRY
|
200 |
+
|
201 |
+
# Names of the input feature maps to be used by RPN
|
202 |
+
# e.g., ["p2", "p3", "p4", "p5", "p6"] for FPN
|
203 |
+
_C.MODEL.RPN.IN_FEATURES = ["res4"]
|
204 |
+
# Remove RPN anchors that go outside the image by BOUNDARY_THRESH pixels
|
205 |
+
# Set to -1 or a large value, e.g. 100000, to disable pruning anchors
|
206 |
+
_C.MODEL.RPN.BOUNDARY_THRESH = -1
|
207 |
+
# IOU overlap ratios [BG_IOU_THRESHOLD, FG_IOU_THRESHOLD]
|
208 |
+
# Minimum overlap required between an anchor and ground-truth box for the
|
209 |
+
# (anchor, gt box) pair to be a positive example (IoU >= FG_IOU_THRESHOLD
|
210 |
+
# ==> positive RPN example: 1)
|
211 |
+
# Maximum overlap allowed between an anchor and ground-truth box for the
|
212 |
+
# (anchor, gt box) pair to be a negative examples (IoU < BG_IOU_THRESHOLD
|
213 |
+
# ==> negative RPN example: 0)
|
214 |
+
# Anchors with overlap in between (BG_IOU_THRESHOLD <= IoU < FG_IOU_THRESHOLD)
|
215 |
+
# are ignored (-1)
|
216 |
+
_C.MODEL.RPN.IOU_THRESHOLDS = [0.3, 0.7]
|
217 |
+
_C.MODEL.RPN.IOU_LABELS = [0, -1, 1]
|
218 |
+
# Number of regions per image used to train RPN
|
219 |
+
_C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256
|
220 |
+
# Target fraction of foreground (positive) examples per RPN minibatch
|
221 |
+
_C.MODEL.RPN.POSITIVE_FRACTION = 0.5
|
222 |
+
# Options are: "smooth_l1", "giou", "diou", "ciou"
|
223 |
+
_C.MODEL.RPN.BBOX_REG_LOSS_TYPE = "smooth_l1"
|
224 |
+
_C.MODEL.RPN.BBOX_REG_LOSS_WEIGHT = 1.0
|
225 |
+
# Weights on (dx, dy, dw, dh) for normalizing RPN anchor regression targets
|
226 |
+
_C.MODEL.RPN.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
|
227 |
+
# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1.
|
228 |
+
_C.MODEL.RPN.SMOOTH_L1_BETA = 0.0
|
229 |
+
_C.MODEL.RPN.LOSS_WEIGHT = 1.0
|
230 |
+
# Number of top scoring RPN proposals to keep before applying NMS
|
231 |
+
# When FPN is used, this is *per FPN level* (not total)
|
232 |
+
_C.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 12000
|
233 |
+
_C.MODEL.RPN.PRE_NMS_TOPK_TEST = 6000
|
234 |
+
# Number of top scoring RPN proposals to keep after applying NMS
|
235 |
+
# When FPN is used, this limit is applied per level and then again to the union
|
236 |
+
# of proposals from all levels
|
237 |
+
# NOTE: When FPN is used, the meaning of this config is different from Detectron1.
|
238 |
+
# It means per-batch topk in Detectron1, but per-image topk here.
|
239 |
+
# See the "find_top_rpn_proposals" function for details.
|
240 |
+
_C.MODEL.RPN.POST_NMS_TOPK_TRAIN = 2000
|
241 |
+
_C.MODEL.RPN.POST_NMS_TOPK_TEST = 1000
|
242 |
+
# NMS threshold used on RPN proposals
|
243 |
+
_C.MODEL.RPN.NMS_THRESH = 0.7
|
244 |
+
# Set this to -1 to use the same number of output channels as input channels.
|
245 |
+
_C.MODEL.RPN.CONV_DIMS = [-1]
|
246 |
+
|
247 |
+
# ---------------------------------------------------------------------------- #
|
248 |
+
# ROI HEADS options
|
249 |
+
# ---------------------------------------------------------------------------- #
|
250 |
+
_C.MODEL.ROI_HEADS = CN()
|
251 |
+
_C.MODEL.ROI_HEADS.NAME = "Res5ROIHeads"
|
252 |
+
# Number of foreground classes
|
253 |
+
_C.MODEL.ROI_HEADS.NUM_CLASSES = 80
|
254 |
+
# Names of the input feature maps to be used by ROI heads
|
255 |
+
# Currently all heads (box, mask, ...) use the same input feature map list
|
256 |
+
# e.g., ["p2", "p3", "p4", "p5"] is commonly used for FPN
|
257 |
+
_C.MODEL.ROI_HEADS.IN_FEATURES = ["res4"]
|
258 |
+
# IOU overlap ratios [IOU_THRESHOLD]
|
259 |
+
# Overlap threshold for an RoI to be considered background (if < IOU_THRESHOLD)
|
260 |
+
# Overlap threshold for an RoI to be considered foreground (if >= IOU_THRESHOLD)
|
261 |
+
_C.MODEL.ROI_HEADS.IOU_THRESHOLDS = [0.5]
|
262 |
+
_C.MODEL.ROI_HEADS.IOU_LABELS = [0, 1]
|
263 |
+
# RoI minibatch size *per image* (number of regions of interest [ROIs]) during training
|
264 |
+
# Total number of RoIs per training minibatch =
|
265 |
+
# ROI_HEADS.BATCH_SIZE_PER_IMAGE * SOLVER.IMS_PER_BATCH
|
266 |
+
# E.g., a common configuration is: 512 * 16 = 8192
|
267 |
+
_C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
|
268 |
+
# Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0)
|
269 |
+
_C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25
|
270 |
+
|
271 |
+
# Only used on test mode
|
272 |
+
|
273 |
+
# Minimum score threshold (assuming scores in a [0, 1] range); a value chosen to
|
274 |
+
# balance obtaining high recall with not having too many low precision
|
275 |
+
# detections that will slow down inference post processing steps (like NMS)
|
276 |
+
# A default threshold of 0.0 increases AP by ~0.2-0.3 but significantly slows down
|
277 |
+
# inference.
|
278 |
+
_C.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05
|
279 |
+
# Overlap threshold used for non-maximum suppression (suppress boxes with
|
280 |
+
# IoU >= this threshold)
|
281 |
+
_C.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.5
|
282 |
+
# If True, augment proposals with ground-truth boxes before sampling proposals to
|
283 |
+
# train ROI heads.
|
284 |
+
_C.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT = True
|
285 |
+
|
286 |
+
# ---------------------------------------------------------------------------- #
|
287 |
+
# Box Head
|
288 |
+
# ---------------------------------------------------------------------------- #
|
289 |
+
_C.MODEL.ROI_BOX_HEAD = CN()
|
290 |
+
# C4 don't use head name option
|
291 |
+
# Options for non-C4 models: FastRCNNConvFCHead,
|
292 |
+
_C.MODEL.ROI_BOX_HEAD.NAME = ""
|
293 |
+
# Options are: "smooth_l1", "giou", "diou", "ciou"
|
294 |
+
_C.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE = "smooth_l1"
|
295 |
+
# The final scaling coefficient on the box regression loss, used to balance the magnitude of its
|
296 |
+
# gradients with other losses in the model. See also `MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT`.
|
297 |
+
_C.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT = 1.0
|
298 |
+
# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets
|
299 |
+
# These are empirically chosen to approximately lead to unit variance targets
|
300 |
+
_C.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0)
|
301 |
+
# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1.
|
302 |
+
_C.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA = 0.0
|
303 |
+
_C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14
|
304 |
+
_C.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO = 0
|
305 |
+
# Type of pooling operation applied to the incoming feature map for each RoI
|
306 |
+
_C.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2"
|
307 |
+
|
308 |
+
_C.MODEL.ROI_BOX_HEAD.NUM_FC = 0
|
309 |
+
# Hidden layer dimension for FC layers in the RoI box head
|
310 |
+
_C.MODEL.ROI_BOX_HEAD.FC_DIM = 1024
|
311 |
+
_C.MODEL.ROI_BOX_HEAD.NUM_CONV = 0
|
312 |
+
# Channel dimension for Conv layers in the RoI box head
|
313 |
+
_C.MODEL.ROI_BOX_HEAD.CONV_DIM = 256
|
314 |
+
# Normalization method for the convolution layers.
|
315 |
+
# Options: "" (no norm), "GN", "SyncBN".
|
316 |
+
_C.MODEL.ROI_BOX_HEAD.NORM = ""
|
317 |
+
# Whether to use class agnostic for bbox regression
|
318 |
+
_C.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG = False
|
319 |
+
# If true, RoI heads use bounding boxes predicted by the box head rather than proposal boxes.
|
320 |
+
_C.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES = False
|
321 |
+
|
322 |
+
# Federated loss can be used to improve the training of LVIS
|
323 |
+
_C.MODEL.ROI_BOX_HEAD.USE_FED_LOSS = False
|
324 |
+
# Sigmoid cross entrophy is used with federated loss
|
325 |
+
_C.MODEL.ROI_BOX_HEAD.USE_SIGMOID_CE = False
|
326 |
+
# The power value applied to image_count when calcualting frequency weight
|
327 |
+
_C.MODEL.ROI_BOX_HEAD.FED_LOSS_FREQ_WEIGHT_POWER = 0.5
|
328 |
+
# Number of classes to keep in total
|
329 |
+
_C.MODEL.ROI_BOX_HEAD.FED_LOSS_NUM_CLASSES = 50
|
330 |
+
|
331 |
+
# ---------------------------------------------------------------------------- #
|
332 |
+
# Cascaded Box Head
|
333 |
+
# ---------------------------------------------------------------------------- #
|
334 |
+
_C.MODEL.ROI_BOX_CASCADE_HEAD = CN()
|
335 |
+
# The number of cascade stages is implicitly defined by the length of the following two configs.
|
336 |
+
_C.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS = (
|
337 |
+
(10.0, 10.0, 5.0, 5.0),
|
338 |
+
(20.0, 20.0, 10.0, 10.0),
|
339 |
+
(30.0, 30.0, 15.0, 15.0),
|
340 |
+
)
|
341 |
+
_C.MODEL.ROI_BOX_CASCADE_HEAD.IOUS = (0.5, 0.6, 0.7)
|
342 |
+
|
343 |
+
|
344 |
+
# ---------------------------------------------------------------------------- #
|
345 |
+
# Mask Head
|
346 |
+
# ---------------------------------------------------------------------------- #
|
347 |
+
_C.MODEL.ROI_MASK_HEAD = CN()
|
348 |
+
_C.MODEL.ROI_MASK_HEAD.NAME = "MaskRCNNConvUpsampleHead"
|
349 |
+
_C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 14
|
350 |
+
_C.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO = 0
|
351 |
+
_C.MODEL.ROI_MASK_HEAD.NUM_CONV = 0 # The number of convs in the mask head
|
352 |
+
_C.MODEL.ROI_MASK_HEAD.CONV_DIM = 256
|
353 |
+
# Normalization method for the convolution layers.
|
354 |
+
# Options: "" (no norm), "GN", "SyncBN".
|
355 |
+
_C.MODEL.ROI_MASK_HEAD.NORM = ""
|
356 |
+
# Whether to use class agnostic for mask prediction
|
357 |
+
_C.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK = False
|
358 |
+
# Type of pooling operation applied to the incoming feature map for each RoI
|
359 |
+
_C.MODEL.ROI_MASK_HEAD.POOLER_TYPE = "ROIAlignV2"
|
360 |
+
|
361 |
+
|
362 |
+
# ---------------------------------------------------------------------------- #
|
363 |
+
# Keypoint Head
|
364 |
+
# ---------------------------------------------------------------------------- #
|
365 |
+
_C.MODEL.ROI_KEYPOINT_HEAD = CN()
|
366 |
+
_C.MODEL.ROI_KEYPOINT_HEAD.NAME = "KRCNNConvDeconvUpsampleHead"
|
367 |
+
_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION = 14
|
368 |
+
_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO = 0
|
369 |
+
_C.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS = tuple(512 for _ in range(8))
|
370 |
+
_C.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 17 # 17 is the number of keypoints in COCO.
|
371 |
+
|
372 |
+
# Images with too few (or no) keypoints are excluded from training.
|
373 |
+
_C.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE = 1
|
374 |
+
# Normalize by the total number of visible keypoints in the minibatch if True.
|
375 |
+
# Otherwise, normalize by the total number of keypoints that could ever exist
|
376 |
+
# in the minibatch.
|
377 |
+
# The keypoint softmax loss is only calculated on visible keypoints.
|
378 |
+
# Since the number of visible keypoints can vary significantly between
|
379 |
+
# minibatches, this has the effect of up-weighting the importance of
|
380 |
+
# minibatches with few visible keypoints. (Imagine the extreme case of
|
381 |
+
# only one visible keypoint versus N: in the case of N, each one
|
382 |
+
# contributes 1/N to the gradient compared to the single keypoint
|
383 |
+
# determining the gradient direction). Instead, we can normalize the
|
384 |
+
# loss by the total number of keypoints, if it were the case that all
|
385 |
+
# keypoints were visible in a full minibatch. (Returning to the example,
|
386 |
+
# this means that the one visible keypoint contributes as much as each
|
387 |
+
# of the N keypoints.)
|
388 |
+
_C.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS = True
|
389 |
+
# Multi-task loss weight to use for keypoints
|
390 |
+
# Recommended values:
|
391 |
+
# - use 1.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is True
|
392 |
+
# - use 4.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is False
|
393 |
+
_C.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT = 1.0
|
394 |
+
# Type of pooling operation applied to the incoming feature map for each RoI
|
395 |
+
_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE = "ROIAlignV2"
|
396 |
+
|
397 |
+
# ---------------------------------------------------------------------------- #
|
398 |
+
# Semantic Segmentation Head
|
399 |
+
# ---------------------------------------------------------------------------- #
|
400 |
+
_C.MODEL.SEM_SEG_HEAD = CN()
|
401 |
+
_C.MODEL.SEM_SEG_HEAD.NAME = "SemSegFPNHead"
|
402 |
+
_C.MODEL.SEM_SEG_HEAD.IN_FEATURES = ["p2", "p3", "p4", "p5"]
|
403 |
+
# Label in the semantic segmentation ground truth that is ignored, i.e., no loss is calculated for
|
404 |
+
# the correposnding pixel.
|
405 |
+
_C.MODEL.SEM_SEG_HEAD.IGNORE_VALUE = 255
|
406 |
+
# Number of classes in the semantic segmentation head
|
407 |
+
_C.MODEL.SEM_SEG_HEAD.NUM_CLASSES = 54
|
408 |
+
# Number of channels in the 3x3 convs inside semantic-FPN heads.
|
409 |
+
_C.MODEL.SEM_SEG_HEAD.CONVS_DIM = 128
|
410 |
+
# Outputs from semantic-FPN heads are up-scaled to the COMMON_STRIDE stride.
|
411 |
+
_C.MODEL.SEM_SEG_HEAD.COMMON_STRIDE = 4
|
412 |
+
# Normalization method for the convolution layers. Options: "" (no norm), "GN".
|
413 |
+
_C.MODEL.SEM_SEG_HEAD.NORM = "GN"
|
414 |
+
_C.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT = 1.0
|
415 |
+
|
416 |
+
_C.MODEL.PANOPTIC_FPN = CN()
|
417 |
+
# Scaling of all losses from instance detection / segmentation head.
|
418 |
+
_C.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT = 1.0
|
419 |
+
|
420 |
+
# options when combining instance & semantic segmentation outputs
|
421 |
+
_C.MODEL.PANOPTIC_FPN.COMBINE = CN({"ENABLED": True}) # "COMBINE.ENABLED" is deprecated & not used
|
422 |
+
_C.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH = 0.5
|
423 |
+
_C.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT = 4096
|
424 |
+
_C.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = 0.5
|
425 |
+
|
426 |
+
|
427 |
+
# ---------------------------------------------------------------------------- #
|
428 |
+
# RetinaNet Head
|
429 |
+
# ---------------------------------------------------------------------------- #
|
430 |
+
_C.MODEL.RETINANET = CN()
|
431 |
+
|
432 |
+
# This is the number of foreground classes.
|
433 |
+
_C.MODEL.RETINANET.NUM_CLASSES = 80
|
434 |
+
|
435 |
+
_C.MODEL.RETINANET.IN_FEATURES = ["p3", "p4", "p5", "p6", "p7"]
|
436 |
+
|
437 |
+
# Convolutions to use in the cls and bbox tower
|
438 |
+
# NOTE: this doesn't include the last conv for logits
|
439 |
+
_C.MODEL.RETINANET.NUM_CONVS = 4
|
440 |
+
|
441 |
+
# IoU overlap ratio [bg, fg] for labeling anchors.
|
442 |
+
# Anchors with < bg are labeled negative (0)
|
443 |
+
# Anchors with >= bg and < fg are ignored (-1)
|
444 |
+
# Anchors with >= fg are labeled positive (1)
|
445 |
+
_C.MODEL.RETINANET.IOU_THRESHOLDS = [0.4, 0.5]
|
446 |
+
_C.MODEL.RETINANET.IOU_LABELS = [0, -1, 1]
|
447 |
+
|
448 |
+
# Prior prob for rare case (i.e. foreground) at the beginning of training.
|
449 |
+
# This is used to set the bias for the logits layer of the classifier subnet.
|
450 |
+
# This improves training stability in the case of heavy class imbalance.
|
451 |
+
_C.MODEL.RETINANET.PRIOR_PROB = 0.01
|
452 |
+
|
453 |
+
# Inference cls score threshold, only anchors with score > INFERENCE_TH are
|
454 |
+
# considered for inference (to improve speed)
|
455 |
+
_C.MODEL.RETINANET.SCORE_THRESH_TEST = 0.05
|
456 |
+
# Select topk candidates before NMS
|
457 |
+
_C.MODEL.RETINANET.TOPK_CANDIDATES_TEST = 1000
|
458 |
+
_C.MODEL.RETINANET.NMS_THRESH_TEST = 0.5
|
459 |
+
|
460 |
+
# Weights on (dx, dy, dw, dh) for normalizing Retinanet anchor regression targets
|
461 |
+
_C.MODEL.RETINANET.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
|
462 |
+
|
463 |
+
# Loss parameters
|
464 |
+
_C.MODEL.RETINANET.FOCAL_LOSS_GAMMA = 2.0
|
465 |
+
_C.MODEL.RETINANET.FOCAL_LOSS_ALPHA = 0.25
|
466 |
+
_C.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA = 0.1
|
467 |
+
# Options are: "smooth_l1", "giou", "diou", "ciou"
|
468 |
+
_C.MODEL.RETINANET.BBOX_REG_LOSS_TYPE = "smooth_l1"
|
469 |
+
|
470 |
+
# One of BN, SyncBN, FrozenBN, GN
|
471 |
+
# Only supports GN until unshared norm is implemented
|
472 |
+
_C.MODEL.RETINANET.NORM = ""
|
473 |
+
|
474 |
+
|
475 |
+
# ---------------------------------------------------------------------------- #
|
476 |
+
# ResNe[X]t options (ResNets = {ResNet, ResNeXt}
|
477 |
+
# Note that parts of a resnet may be used for both the backbone and the head
|
478 |
+
# These options apply to both
|
479 |
+
# ---------------------------------------------------------------------------- #
|
480 |
+
_C.MODEL.RESNETS = CN()
|
481 |
+
|
482 |
+
_C.MODEL.RESNETS.DEPTH = 50
|
483 |
+
_C.MODEL.RESNETS.OUT_FEATURES = ["res4"] # res4 for C4 backbone, res2..5 for FPN backbone
|
484 |
+
|
485 |
+
# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
|
486 |
+
_C.MODEL.RESNETS.NUM_GROUPS = 1
|
487 |
+
|
488 |
+
# Options: FrozenBN, GN, "SyncBN", "BN"
|
489 |
+
_C.MODEL.RESNETS.NORM = "FrozenBN"
|
490 |
+
|
491 |
+
# Baseline width of each group.
|
492 |
+
# Scaling this parameters will scale the width of all bottleneck layers.
|
493 |
+
_C.MODEL.RESNETS.WIDTH_PER_GROUP = 64
|
494 |
+
|
495 |
+
# Place the stride 2 conv on the 1x1 filter
|
496 |
+
# Use True only for the original MSRA ResNet; use False for C2 and Torch models
|
497 |
+
_C.MODEL.RESNETS.STRIDE_IN_1X1 = True
|
498 |
+
|
499 |
+
# Apply dilation in stage "res5"
|
500 |
+
_C.MODEL.RESNETS.RES5_DILATION = 1
|
501 |
+
|
502 |
+
# Output width of res2. Scaling this parameters will scale the width of all 1x1 convs in ResNet
|
503 |
+
# For R18 and R34, this needs to be set to 64
|
504 |
+
_C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256
|
505 |
+
_C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64
|
506 |
+
|
507 |
+
# Apply Deformable Convolution in stages
|
508 |
+
# Specify if apply deform_conv on Res2, Res3, Res4, Res5
|
509 |
+
_C.MODEL.RESNETS.DEFORM_ON_PER_STAGE = [False, False, False, False]
|
510 |
+
# Use True to use modulated deform_conv (DeformableV2, https://arxiv.org/abs/1811.11168);
|
511 |
+
# Use False for DeformableV1.
|
512 |
+
_C.MODEL.RESNETS.DEFORM_MODULATED = False
|
513 |
+
# Number of groups in deformable conv.
|
514 |
+
_C.MODEL.RESNETS.DEFORM_NUM_GROUPS = 1
|
515 |
+
|
516 |
+
|
517 |
+
# ---------------------------------------------------------------------------- #
|
518 |
+
# Solver
|
519 |
+
# ---------------------------------------------------------------------------- #
|
520 |
+
_C.SOLVER = CN()
|
521 |
+
|
522 |
+
# Options: WarmupMultiStepLR, WarmupCosineLR.
|
523 |
+
# See detectron2/solver/build.py for definition.
|
524 |
+
_C.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR"
|
525 |
+
|
526 |
+
_C.SOLVER.MAX_ITER = 40000
|
527 |
+
|
528 |
+
_C.SOLVER.BASE_LR = 0.001
|
529 |
+
# The end lr, only used by WarmupCosineLR
|
530 |
+
_C.SOLVER.BASE_LR_END = 0.0
|
531 |
+
|
532 |
+
_C.SOLVER.MOMENTUM = 0.9
|
533 |
+
|
534 |
+
_C.SOLVER.NESTEROV = False
|
535 |
+
|
536 |
+
_C.SOLVER.WEIGHT_DECAY = 0.0001
|
537 |
+
# The weight decay that's applied to parameters of normalization layers
|
538 |
+
# (typically the affine transformation)
|
539 |
+
_C.SOLVER.WEIGHT_DECAY_NORM = 0.0
|
540 |
+
|
541 |
+
_C.SOLVER.GAMMA = 0.1
|
542 |
+
# The iteration number to decrease learning rate by GAMMA.
|
543 |
+
_C.SOLVER.STEPS = (30000,)
|
544 |
+
|
545 |
+
_C.SOLVER.WARMUP_FACTOR = 1.0 / 1000
|
546 |
+
_C.SOLVER.WARMUP_ITERS = 1000
|
547 |
+
_C.SOLVER.WARMUP_METHOD = "linear"
|
548 |
+
|
549 |
+
# Save a checkpoint after every this number of iterations
|
550 |
+
_C.SOLVER.CHECKPOINT_PERIOD = 5000
|
551 |
+
|
552 |
+
# Number of images per batch across all machines. This is also the number
|
553 |
+
# of training images per step (i.e. per iteration). If we use 16 GPUs
|
554 |
+
# and IMS_PER_BATCH = 32, each GPU will see 2 images per batch.
|
555 |
+
# May be adjusted automatically if REFERENCE_WORLD_SIZE is set.
|
556 |
+
_C.SOLVER.IMS_PER_BATCH = 16
|
557 |
+
|
558 |
+
# The reference number of workers (GPUs) this config is meant to train with.
|
559 |
+
# It takes no effect when set to 0.
|
560 |
+
# With a non-zero value, it will be used by DefaultTrainer to compute a desired
|
561 |
+
# per-worker batch size, and then scale the other related configs (total batch size,
|
562 |
+
# learning rate, etc) to match the per-worker batch size.
|
563 |
+
# See documentation of `DefaultTrainer.auto_scale_workers` for details:
|
564 |
+
_C.SOLVER.REFERENCE_WORLD_SIZE = 0
|
565 |
+
|
566 |
+
# Detectron v1 (and previous detection code) used a 2x higher LR and 0 WD for
|
567 |
+
# biases. This is not useful (at least for recent models). You should avoid
|
568 |
+
# changing these and they exist only to reproduce Detectron v1 training if
|
569 |
+
# desired.
|
570 |
+
_C.SOLVER.BIAS_LR_FACTOR = 1.0
|
571 |
+
_C.SOLVER.WEIGHT_DECAY_BIAS = None # None means following WEIGHT_DECAY
|
572 |
+
|
573 |
+
# Gradient clipping
|
574 |
+
_C.SOLVER.CLIP_GRADIENTS = CN({"ENABLED": False})
|
575 |
+
# Type of gradient clipping, currently 2 values are supported:
|
576 |
+
# - "value": the absolute values of elements of each gradients are clipped
|
577 |
+
# - "norm": the norm of the gradient for each parameter is clipped thus
|
578 |
+
# affecting all elements in the parameter
|
579 |
+
_C.SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "value"
|
580 |
+
# Maximum absolute value used for clipping gradients
|
581 |
+
_C.SOLVER.CLIP_GRADIENTS.CLIP_VALUE = 1.0
|
582 |
+
# Floating point number p for L-p norm to be used with the "norm"
|
583 |
+
# gradient clipping type; for L-inf, please specify .inf
|
584 |
+
_C.SOLVER.CLIP_GRADIENTS.NORM_TYPE = 2.0
|
585 |
+
|
586 |
+
# Enable automatic mixed precision for training
|
587 |
+
# Note that this does not change model's inference behavior.
|
588 |
+
# To use AMP in inference, run inference under autocast()
|
589 |
+
_C.SOLVER.AMP = CN({"ENABLED": False})
|
590 |
+
|
591 |
+
# ---------------------------------------------------------------------------- #
|
592 |
+
# Specific test options
|
593 |
+
# ---------------------------------------------------------------------------- #
|
594 |
+
_C.TEST = CN()
|
595 |
+
# For end-to-end tests to verify the expected accuracy.
|
596 |
+
# Each item is [task, metric, value, tolerance]
|
597 |
+
# e.g.: [['bbox', 'AP', 38.5, 0.2]]
|
598 |
+
_C.TEST.EXPECTED_RESULTS = []
|
599 |
+
# The period (in terms of steps) to evaluate the model during training.
|
600 |
+
# Set to 0 to disable.
|
601 |
+
_C.TEST.EVAL_PERIOD = 0
|
602 |
+
# The sigmas used to calculate keypoint OKS. See http://cocodataset.org/#keypoints-eval
|
603 |
+
# When empty, it will use the defaults in COCO.
|
604 |
+
# Otherwise it should be a list[float] with the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS.
|
605 |
+
_C.TEST.KEYPOINT_OKS_SIGMAS = []
|
606 |
+
# Maximum number of detections to return per image during inference (100 is
|
607 |
+
# based on the limit established for the COCO dataset).
|
608 |
+
_C.TEST.DETECTIONS_PER_IMAGE = 100
|
609 |
+
|
610 |
+
_C.TEST.AUG = CN({"ENABLED": False})
|
611 |
+
_C.TEST.AUG.MIN_SIZES = (400, 500, 600, 700, 800, 900, 1000, 1100, 1200)
|
612 |
+
_C.TEST.AUG.MAX_SIZE = 4000
|
613 |
+
_C.TEST.AUG.FLIP = True
|
614 |
+
|
615 |
+
_C.TEST.PRECISE_BN = CN({"ENABLED": False})
|
616 |
+
_C.TEST.PRECISE_BN.NUM_ITER = 200
|
617 |
+
|
618 |
+
# ---------------------------------------------------------------------------- #
|
619 |
+
# Misc options
|
620 |
+
# ---------------------------------------------------------------------------- #
|
621 |
+
# Directory where output files are written
|
622 |
+
_C.OUTPUT_DIR = "./output"
|
623 |
+
# Set seed to negative to fully randomize everything.
|
624 |
+
# Set seed to positive to use a fixed seed. Note that a fixed seed increases
|
625 |
+
# reproducibility but does not guarantee fully deterministic behavior.
|
626 |
+
# Disabling all parallelism further increases reproducibility.
|
627 |
+
_C.SEED = -1
|
628 |
+
# Benchmark different cudnn algorithms.
|
629 |
+
# If input images have very different sizes, this option will have large overhead
|
630 |
+
# for about 10k iterations. It usually hurts total time, but can benefit for certain models.
|
631 |
+
# If input images have the same or similar sizes, benchmark is often helpful.
|
632 |
+
_C.CUDNN_BENCHMARK = False
|
633 |
+
# The period (in terms of steps) for minibatch visualization at train time.
|
634 |
+
# Set to 0 to disable.
|
635 |
+
_C.VIS_PERIOD = 0
|
636 |
+
|
637 |
+
# global config is for quick hack purposes.
|
638 |
+
# You can set them in command line or config files,
|
639 |
+
# and access it with:
|
640 |
+
#
|
641 |
+
# from detectron2.config import global_cfg
|
642 |
+
# print(global_cfg.HACK)
|
643 |
+
#
|
644 |
+
# Do not commit any configs into it.
|
645 |
+
_C.GLOBAL = CN()
|
646 |
+
_C.GLOBAL.HACK = 1.0
|
detectron2/build/lib.linux-x86_64-3.10/detectron2/config/instantiate.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
|
3 |
+
import collections.abc as abc
|
4 |
+
import dataclasses
|
5 |
+
import logging
|
6 |
+
from typing import Any
|
7 |
+
|
8 |
+
from detectron2.utils.registry import _convert_target_to_string, locate
|
9 |
+
|
10 |
+
__all__ = ["dump_dataclass", "instantiate"]
|
11 |
+
|
12 |
+
|
13 |
+
def dump_dataclass(obj: Any):
|
14 |
+
"""
|
15 |
+
Dump a dataclass recursively into a dict that can be later instantiated.
|
16 |
+
|
17 |
+
Args:
|
18 |
+
obj: a dataclass object
|
19 |
+
|
20 |
+
Returns:
|
21 |
+
dict
|
22 |
+
"""
|
23 |
+
assert dataclasses.is_dataclass(obj) and not isinstance(
|
24 |
+
obj, type
|
25 |
+
), "dump_dataclass() requires an instance of a dataclass."
|
26 |
+
ret = {"_target_": _convert_target_to_string(type(obj))}
|
27 |
+
for f in dataclasses.fields(obj):
|
28 |
+
v = getattr(obj, f.name)
|
29 |
+
if dataclasses.is_dataclass(v):
|
30 |
+
v = dump_dataclass(v)
|
31 |
+
if isinstance(v, (list, tuple)):
|
32 |
+
v = [dump_dataclass(x) if dataclasses.is_dataclass(x) else x for x in v]
|
33 |
+
ret[f.name] = v
|
34 |
+
return ret
|
35 |
+
|
36 |
+
|
37 |
+
def instantiate(cfg):
|
38 |
+
"""
|
39 |
+
Recursively instantiate objects defined in dictionaries by
|
40 |
+
"_target_" and arguments.
|
41 |
+
|
42 |
+
Args:
|
43 |
+
cfg: a dict-like object with "_target_" that defines the caller, and
|
44 |
+
other keys that define the arguments
|
45 |
+
|
46 |
+
Returns:
|
47 |
+
object instantiated by cfg
|
48 |
+
"""
|
49 |
+
from omegaconf import ListConfig
|
50 |
+
|
51 |
+
if isinstance(cfg, ListConfig):
|
52 |
+
lst = [instantiate(x) for x in cfg]
|
53 |
+
return ListConfig(lst, flags={"allow_objects": True})
|
54 |
+
if isinstance(cfg, list):
|
55 |
+
# Specialize for list, because many classes take
|
56 |
+
# list[objects] as arguments, such as ResNet, DatasetMapper
|
57 |
+
return [instantiate(x) for x in cfg]
|
58 |
+
|
59 |
+
if isinstance(cfg, abc.Mapping) and "_target_" in cfg:
|
60 |
+
# conceptually equivalent to hydra.utils.instantiate(cfg) with _convert_=all,
|
61 |
+
# but faster: https://github.com/facebookresearch/hydra/issues/1200
|
62 |
+
cfg = {k: instantiate(v) for k, v in cfg.items()}
|
63 |
+
cls = cfg.pop("_target_")
|
64 |
+
cls = instantiate(cls)
|
65 |
+
|
66 |
+
if isinstance(cls, str):
|
67 |
+
cls_name = cls
|
68 |
+
cls = locate(cls_name)
|
69 |
+
assert cls is not None, cls_name
|
70 |
+
else:
|
71 |
+
try:
|
72 |
+
cls_name = cls.__module__ + "." + cls.__qualname__
|
73 |
+
except Exception:
|
74 |
+
# target could be anything, so the above could fail
|
75 |
+
cls_name = str(cls)
|
76 |
+
assert callable(cls), f"_target_ {cls} does not define a callable object"
|
77 |
+
try:
|
78 |
+
return cls(**cfg)
|
79 |
+
except TypeError:
|
80 |
+
logger = logging.getLogger(__name__)
|
81 |
+
logger.error(f"Error when instantiating {cls_name}!")
|
82 |
+
raise
|
83 |
+
return cfg # return as-is if don't know what to do
|
detectron2/build/lib.linux-x86_64-3.10/detectron2/config/lazy.py
ADDED
@@ -0,0 +1,400 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
|
3 |
+
import ast
|
4 |
+
import builtins
|
5 |
+
import collections.abc as abc
|
6 |
+
import importlib
|
7 |
+
import inspect
|
8 |
+
import logging
|
9 |
+
import os
|
10 |
+
import uuid
|
11 |
+
from contextlib import contextmanager
|
12 |
+
from copy import deepcopy
|
13 |
+
from dataclasses import is_dataclass
|
14 |
+
from typing import List, Tuple, Union
|
15 |
+
import cloudpickle
|
16 |
+
import yaml
|
17 |
+
from omegaconf import DictConfig, ListConfig, OmegaConf
|
18 |
+
|
19 |
+
from detectron2.utils.file_io import PathManager
|
20 |
+
from detectron2.utils.registry import _convert_target_to_string
|
21 |
+
|
22 |
+
__all__ = ["LazyCall", "LazyConfig"]
|
23 |
+
|
24 |
+
|
25 |
+
class LazyCall:
|
26 |
+
"""
|
27 |
+
Wrap a callable so that when it's called, the call will not be executed,
|
28 |
+
but returns a dict that describes the call.
|
29 |
+
|
30 |
+
LazyCall object has to be called with only keyword arguments. Positional
|
31 |
+
arguments are not yet supported.
|
32 |
+
|
33 |
+
Examples:
|
34 |
+
::
|
35 |
+
from detectron2.config import instantiate, LazyCall
|
36 |
+
|
37 |
+
layer_cfg = LazyCall(nn.Conv2d)(in_channels=32, out_channels=32)
|
38 |
+
layer_cfg.out_channels = 64 # can edit it afterwards
|
39 |
+
layer = instantiate(layer_cfg)
|
40 |
+
"""
|
41 |
+
|
42 |
+
def __init__(self, target):
|
43 |
+
if not (callable(target) or isinstance(target, (str, abc.Mapping))):
|
44 |
+
raise TypeError(
|
45 |
+
f"target of LazyCall must be a callable or defines a callable! Got {target}"
|
46 |
+
)
|
47 |
+
self._target = target
|
48 |
+
|
49 |
+
def __call__(self, **kwargs):
|
50 |
+
if is_dataclass(self._target):
|
51 |
+
# omegaconf object cannot hold dataclass type
|
52 |
+
# https://github.com/omry/omegaconf/issues/784
|
53 |
+
target = _convert_target_to_string(self._target)
|
54 |
+
else:
|
55 |
+
target = self._target
|
56 |
+
kwargs["_target_"] = target
|
57 |
+
|
58 |
+
return DictConfig(content=kwargs, flags={"allow_objects": True})
|
59 |
+
|
60 |
+
|
61 |
+
def _visit_dict_config(cfg, func):
|
62 |
+
"""
|
63 |
+
Apply func recursively to all DictConfig in cfg.
|
64 |
+
"""
|
65 |
+
if isinstance(cfg, DictConfig):
|
66 |
+
func(cfg)
|
67 |
+
for v in cfg.values():
|
68 |
+
_visit_dict_config(v, func)
|
69 |
+
elif isinstance(cfg, ListConfig):
|
70 |
+
for v in cfg:
|
71 |
+
_visit_dict_config(v, func)
|
72 |
+
|
73 |
+
|
74 |
+
def _validate_py_syntax(filename):
|
75 |
+
# see also https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/config.py
|
76 |
+
with PathManager.open(filename, "r") as f:
|
77 |
+
content = f.read()
|
78 |
+
try:
|
79 |
+
ast.parse(content)
|
80 |
+
except SyntaxError as e:
|
81 |
+
raise SyntaxError(f"Config file {filename} has syntax error!") from e
|
82 |
+
|
83 |
+
|
84 |
+
def _cast_to_config(obj):
|
85 |
+
# if given a dict, return DictConfig instead
|
86 |
+
if isinstance(obj, dict):
|
87 |
+
return DictConfig(obj, flags={"allow_objects": True})
|
88 |
+
return obj
|
89 |
+
|
90 |
+
|
91 |
+
_CFG_PACKAGE_NAME = "detectron2._cfg_loader"
|
92 |
+
"""
|
93 |
+
A namespace to put all imported config into.
|
94 |
+
"""
|
95 |
+
|
96 |
+
|
97 |
+
def _random_package_name(filename):
|
98 |
+
# generate a random package name when loading config files
|
99 |
+
return _CFG_PACKAGE_NAME + str(uuid.uuid4())[:4] + "." + os.path.basename(filename)
|
100 |
+
|
101 |
+
|
102 |
+
@contextmanager
|
103 |
+
def _patch_import():
|
104 |
+
"""
|
105 |
+
Enhance relative import statements in config files, so that they:
|
106 |
+
1. locate files purely based on relative location, regardless of packages.
|
107 |
+
e.g. you can import file without having __init__
|
108 |
+
2. do not cache modules globally; modifications of module states has no side effect
|
109 |
+
3. support other storage system through PathManager
|
110 |
+
4. imported dict are turned into omegaconf.DictConfig automatically
|
111 |
+
"""
|
112 |
+
old_import = builtins.__import__
|
113 |
+
|
114 |
+
def find_relative_file(original_file, relative_import_path, level):
|
115 |
+
cur_file = os.path.dirname(original_file)
|
116 |
+
for _ in range(level - 1):
|
117 |
+
cur_file = os.path.dirname(cur_file)
|
118 |
+
cur_name = relative_import_path.lstrip(".")
|
119 |
+
for part in cur_name.split("."):
|
120 |
+
cur_file = os.path.join(cur_file, part)
|
121 |
+
# NOTE: directory import is not handled. Because then it's unclear
|
122 |
+
# if such import should produce python module or DictConfig. This can
|
123 |
+
# be discussed further if needed.
|
124 |
+
if not cur_file.endswith(".py"):
|
125 |
+
cur_file += ".py"
|
126 |
+
if not PathManager.isfile(cur_file):
|
127 |
+
raise ImportError(
|
128 |
+
f"Cannot import name {relative_import_path} from "
|
129 |
+
f"{original_file}: {cur_file} has to exist."
|
130 |
+
)
|
131 |
+
return cur_file
|
132 |
+
|
133 |
+
def new_import(name, globals=None, locals=None, fromlist=(), level=0):
|
134 |
+
if (
|
135 |
+
# Only deal with relative imports inside config files
|
136 |
+
level != 0
|
137 |
+
and globals is not None
|
138 |
+
and (globals.get("__package__", "") or "").startswith(_CFG_PACKAGE_NAME)
|
139 |
+
):
|
140 |
+
cur_file = find_relative_file(globals["__file__"], name, level)
|
141 |
+
_validate_py_syntax(cur_file)
|
142 |
+
spec = importlib.machinery.ModuleSpec(
|
143 |
+
_random_package_name(cur_file), None, origin=cur_file
|
144 |
+
)
|
145 |
+
module = importlib.util.module_from_spec(spec)
|
146 |
+
module.__file__ = cur_file
|
147 |
+
with PathManager.open(cur_file) as f:
|
148 |
+
content = f.read()
|
149 |
+
exec(compile(content, cur_file, "exec"), module.__dict__)
|
150 |
+
for name in fromlist: # turn imported dict into DictConfig automatically
|
151 |
+
val = _cast_to_config(module.__dict__[name])
|
152 |
+
module.__dict__[name] = val
|
153 |
+
return module
|
154 |
+
return old_import(name, globals, locals, fromlist=fromlist, level=level)
|
155 |
+
|
156 |
+
builtins.__import__ = new_import
|
157 |
+
yield new_import
|
158 |
+
builtins.__import__ = old_import
|
159 |
+
|
160 |
+
|
161 |
+
class LazyConfig:
|
162 |
+
"""
|
163 |
+
Provide methods to save, load, and overrides an omegaconf config object
|
164 |
+
which may contain definition of lazily-constructed objects.
|
165 |
+
"""
|
166 |
+
|
167 |
+
@staticmethod
|
168 |
+
def load_rel(filename: str, keys: Union[None, str, Tuple[str, ...]] = None):
|
169 |
+
"""
|
170 |
+
Similar to :meth:`load()`, but load path relative to the caller's
|
171 |
+
source file.
|
172 |
+
|
173 |
+
This has the same functionality as a relative import, except that this method
|
174 |
+
accepts filename as a string, so more characters are allowed in the filename.
|
175 |
+
"""
|
176 |
+
caller_frame = inspect.stack()[1]
|
177 |
+
caller_fname = caller_frame[0].f_code.co_filename
|
178 |
+
assert caller_fname != "<string>", "load_rel Unable to find caller"
|
179 |
+
caller_dir = os.path.dirname(caller_fname)
|
180 |
+
filename = os.path.join(caller_dir, filename)
|
181 |
+
return LazyConfig.load(filename, keys)
|
182 |
+
|
183 |
+
@staticmethod
|
184 |
+
def load(filename: str, keys: Union[None, str, Tuple[str, ...]] = None):
|
185 |
+
"""
|
186 |
+
Load a config file.
|
187 |
+
|
188 |
+
Args:
|
189 |
+
filename: absolute path or relative path w.r.t. the current working directory
|
190 |
+
keys: keys to load and return. If not given, return all keys
|
191 |
+
(whose values are config objects) in a dict.
|
192 |
+
"""
|
193 |
+
has_keys = keys is not None
|
194 |
+
filename = filename.replace("/./", "/") # redundant
|
195 |
+
if os.path.splitext(filename)[1] not in [".py", ".yaml", ".yml"]:
|
196 |
+
raise ValueError(f"Config file {filename} has to be a python or yaml file.")
|
197 |
+
if filename.endswith(".py"):
|
198 |
+
_validate_py_syntax(filename)
|
199 |
+
|
200 |
+
with _patch_import():
|
201 |
+
# Record the filename
|
202 |
+
module_namespace = {
|
203 |
+
"__file__": filename,
|
204 |
+
"__package__": _random_package_name(filename),
|
205 |
+
}
|
206 |
+
with PathManager.open(filename) as f:
|
207 |
+
content = f.read()
|
208 |
+
# Compile first with filename to:
|
209 |
+
# 1. make filename appears in stacktrace
|
210 |
+
# 2. make load_rel able to find its parent's (possibly remote) location
|
211 |
+
exec(compile(content, filename, "exec"), module_namespace)
|
212 |
+
|
213 |
+
ret = module_namespace
|
214 |
+
else:
|
215 |
+
with PathManager.open(filename) as f:
|
216 |
+
obj = yaml.unsafe_load(f)
|
217 |
+
ret = OmegaConf.create(obj, flags={"allow_objects": True})
|
218 |
+
|
219 |
+
if has_keys:
|
220 |
+
if isinstance(keys, str):
|
221 |
+
return _cast_to_config(ret[keys])
|
222 |
+
else:
|
223 |
+
return tuple(_cast_to_config(ret[a]) for a in keys)
|
224 |
+
else:
|
225 |
+
if filename.endswith(".py"):
|
226 |
+
# when not specified, only load those that are config objects
|
227 |
+
ret = DictConfig(
|
228 |
+
{
|
229 |
+
name: _cast_to_config(value)
|
230 |
+
for name, value in ret.items()
|
231 |
+
if isinstance(value, (DictConfig, ListConfig, dict))
|
232 |
+
and not name.startswith("_")
|
233 |
+
},
|
234 |
+
flags={"allow_objects": True},
|
235 |
+
)
|
236 |
+
return ret
|
237 |
+
|
238 |
+
@staticmethod
|
239 |
+
def save(cfg, filename: str):
|
240 |
+
"""
|
241 |
+
Save a config object to a yaml file.
|
242 |
+
Note that when the config dictionary contains complex objects (e.g. lambda),
|
243 |
+
it can't be saved to yaml. In that case we will print an error and
|
244 |
+
attempt to save to a pkl file instead.
|
245 |
+
|
246 |
+
Args:
|
247 |
+
cfg: an omegaconf config object
|
248 |
+
filename: yaml file name to save the config file
|
249 |
+
"""
|
250 |
+
logger = logging.getLogger(__name__)
|
251 |
+
try:
|
252 |
+
cfg = deepcopy(cfg)
|
253 |
+
except Exception:
|
254 |
+
pass
|
255 |
+
else:
|
256 |
+
# if it's deep-copyable, then...
|
257 |
+
def _replace_type_by_name(x):
|
258 |
+
if "_target_" in x and callable(x._target_):
|
259 |
+
try:
|
260 |
+
x._target_ = _convert_target_to_string(x._target_)
|
261 |
+
except AttributeError:
|
262 |
+
pass
|
263 |
+
|
264 |
+
# not necessary, but makes yaml looks nicer
|
265 |
+
_visit_dict_config(cfg, _replace_type_by_name)
|
266 |
+
|
267 |
+
save_pkl = False
|
268 |
+
try:
|
269 |
+
dict = OmegaConf.to_container(cfg, resolve=False)
|
270 |
+
dumped = yaml.dump(dict, default_flow_style=None, allow_unicode=True, width=9999)
|
271 |
+
with PathManager.open(filename, "w") as f:
|
272 |
+
f.write(dumped)
|
273 |
+
|
274 |
+
try:
|
275 |
+
_ = yaml.unsafe_load(dumped) # test that it is loadable
|
276 |
+
except Exception:
|
277 |
+
logger.warning(
|
278 |
+
"The config contains objects that cannot serialize to a valid yaml. "
|
279 |
+
f"{filename} is human-readable but cannot be loaded."
|
280 |
+
)
|
281 |
+
save_pkl = True
|
282 |
+
except Exception:
|
283 |
+
logger.exception("Unable to serialize the config to yaml. Error:")
|
284 |
+
save_pkl = True
|
285 |
+
|
286 |
+
if save_pkl:
|
287 |
+
new_filename = filename + ".pkl"
|
288 |
+
try:
|
289 |
+
# retry by pickle
|
290 |
+
with PathManager.open(new_filename, "wb") as f:
|
291 |
+
cloudpickle.dump(cfg, f)
|
292 |
+
logger.warning(f"Config is saved using cloudpickle at {new_filename}.")
|
293 |
+
except Exception:
|
294 |
+
pass
|
295 |
+
|
296 |
+
@staticmethod
|
297 |
+
def apply_overrides(cfg, overrides: List[str]):
|
298 |
+
"""
|
299 |
+
In-place override contents of cfg.
|
300 |
+
|
301 |
+
Args:
|
302 |
+
cfg: an omegaconf config object
|
303 |
+
overrides: list of strings in the format of "a=b" to override configs.
|
304 |
+
See https://hydra.cc/docs/next/advanced/override_grammar/basic/
|
305 |
+
for syntax.
|
306 |
+
|
307 |
+
Returns:
|
308 |
+
the cfg object
|
309 |
+
"""
|
310 |
+
|
311 |
+
def safe_update(cfg, key, value):
|
312 |
+
parts = key.split(".")
|
313 |
+
for idx in range(1, len(parts)):
|
314 |
+
prefix = ".".join(parts[:idx])
|
315 |
+
v = OmegaConf.select(cfg, prefix, default=None)
|
316 |
+
if v is None:
|
317 |
+
break
|
318 |
+
if not OmegaConf.is_config(v):
|
319 |
+
raise KeyError(
|
320 |
+
f"Trying to update key {key}, but {prefix} "
|
321 |
+
f"is not a config, but has type {type(v)}."
|
322 |
+
)
|
323 |
+
OmegaConf.update(cfg, key, value, merge=True)
|
324 |
+
|
325 |
+
from hydra.core.override_parser.overrides_parser import OverridesParser
|
326 |
+
|
327 |
+
parser = OverridesParser.create()
|
328 |
+
overrides = parser.parse_overrides(overrides)
|
329 |
+
for o in overrides:
|
330 |
+
key = o.key_or_group
|
331 |
+
value = o.value()
|
332 |
+
if o.is_delete():
|
333 |
+
# TODO support this
|
334 |
+
raise NotImplementedError("deletion is not yet a supported override")
|
335 |
+
safe_update(cfg, key, value)
|
336 |
+
return cfg
|
337 |
+
|
338 |
+
@staticmethod
|
339 |
+
def to_py(cfg, prefix: str = "cfg."):
|
340 |
+
"""
|
341 |
+
Try to convert a config object into Python-like psuedo code.
|
342 |
+
|
343 |
+
Note that perfect conversion is not always possible. So the returned
|
344 |
+
results are mainly meant to be human-readable, and not meant to be executed.
|
345 |
+
|
346 |
+
Args:
|
347 |
+
cfg: an omegaconf config object
|
348 |
+
prefix: root name for the resulting code (default: "cfg.")
|
349 |
+
|
350 |
+
|
351 |
+
Returns:
|
352 |
+
str of formatted Python code
|
353 |
+
"""
|
354 |
+
import black
|
355 |
+
|
356 |
+
cfg = OmegaConf.to_container(cfg, resolve=True)
|
357 |
+
|
358 |
+
def _to_str(obj, prefix=None, inside_call=False):
|
359 |
+
if prefix is None:
|
360 |
+
prefix = []
|
361 |
+
if isinstance(obj, abc.Mapping) and "_target_" in obj:
|
362 |
+
# Dict representing a function call
|
363 |
+
target = _convert_target_to_string(obj.pop("_target_"))
|
364 |
+
args = []
|
365 |
+
for k, v in sorted(obj.items()):
|
366 |
+
args.append(f"{k}={_to_str(v, inside_call=True)}")
|
367 |
+
args = ", ".join(args)
|
368 |
+
call = f"{target}({args})"
|
369 |
+
return "".join(prefix) + call
|
370 |
+
elif isinstance(obj, abc.Mapping) and not inside_call:
|
371 |
+
# Dict that is not inside a call is a list of top-level config objects that we
|
372 |
+
# render as one object per line with dot separated prefixes
|
373 |
+
key_list = []
|
374 |
+
for k, v in sorted(obj.items()):
|
375 |
+
if isinstance(v, abc.Mapping) and "_target_" not in v:
|
376 |
+
key_list.append(_to_str(v, prefix=prefix + [k + "."]))
|
377 |
+
else:
|
378 |
+
key = "".join(prefix) + k
|
379 |
+
key_list.append(f"{key}={_to_str(v)}")
|
380 |
+
return "\n".join(key_list)
|
381 |
+
elif isinstance(obj, abc.Mapping):
|
382 |
+
# Dict that is inside a call is rendered as a regular dict
|
383 |
+
return (
|
384 |
+
"{"
|
385 |
+
+ ",".join(
|
386 |
+
f"{repr(k)}: {_to_str(v, inside_call=inside_call)}"
|
387 |
+
for k, v in sorted(obj.items())
|
388 |
+
)
|
389 |
+
+ "}"
|
390 |
+
)
|
391 |
+
elif isinstance(obj, list):
|
392 |
+
return "[" + ",".join(_to_str(x, inside_call=inside_call) for x in obj) + "]"
|
393 |
+
else:
|
394 |
+
return repr(obj)
|
395 |
+
|
396 |
+
py_str = _to_str(cfg, prefix=[prefix])
|
397 |
+
try:
|
398 |
+
return black.format_str(py_str, mode=black.Mode())
|
399 |
+
except black.InvalidInput:
|
400 |
+
return py_str
|
detectron2/build/lib.linux-x86_64-3.10/detectron2/data/__init__.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
from . import transforms # isort:skip
|
3 |
+
|
4 |
+
from .build import (
|
5 |
+
build_batch_data_loader,
|
6 |
+
build_detection_test_loader,
|
7 |
+
build_detection_train_loader,
|
8 |
+
get_detection_dataset_dicts,
|
9 |
+
load_proposals_into_dataset,
|
10 |
+
print_instances_class_histogram,
|
11 |
+
)
|
12 |
+
from .catalog import DatasetCatalog, MetadataCatalog, Metadata
|
13 |
+
from .common import DatasetFromList, MapDataset, ToIterableDataset
|
14 |
+
from .dataset_mapper import DatasetMapper
|
15 |
+
|
16 |
+
# ensure the builtin datasets are registered
|
17 |
+
from . import datasets, samplers # isort:skip
|
18 |
+
|
19 |
+
__all__ = [k for k in globals().keys() if not k.startswith("_")]
|
detectron2/build/lib.linux-x86_64-3.10/detectron2/data/benchmark.py
ADDED
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
import logging
|
3 |
+
import numpy as np
|
4 |
+
from itertools import count
|
5 |
+
from typing import List, Tuple
|
6 |
+
import torch
|
7 |
+
import tqdm
|
8 |
+
from fvcore.common.timer import Timer
|
9 |
+
|
10 |
+
from detectron2.utils import comm
|
11 |
+
|
12 |
+
from .build import build_batch_data_loader
|
13 |
+
from .common import DatasetFromList, MapDataset
|
14 |
+
from .samplers import TrainingSampler
|
15 |
+
|
16 |
+
logger = logging.getLogger(__name__)
|
17 |
+
|
18 |
+
|
19 |
+
class _EmptyMapDataset(torch.utils.data.Dataset):
|
20 |
+
"""
|
21 |
+
Map anything to emptiness.
|
22 |
+
"""
|
23 |
+
|
24 |
+
def __init__(self, dataset):
|
25 |
+
self.ds = dataset
|
26 |
+
|
27 |
+
def __len__(self):
|
28 |
+
return len(self.ds)
|
29 |
+
|
30 |
+
def __getitem__(self, idx):
|
31 |
+
_ = self.ds[idx]
|
32 |
+
return [0]
|
33 |
+
|
34 |
+
|
35 |
+
def iter_benchmark(
|
36 |
+
iterator, num_iter: int, warmup: int = 5, max_time_seconds: float = 60
|
37 |
+
) -> Tuple[float, List[float]]:
|
38 |
+
"""
|
39 |
+
Benchmark an iterator/iterable for `num_iter` iterations with an extra
|
40 |
+
`warmup` iterations of warmup.
|
41 |
+
End early if `max_time_seconds` time is spent on iterations.
|
42 |
+
|
43 |
+
Returns:
|
44 |
+
float: average time (seconds) per iteration
|
45 |
+
list[float]: time spent on each iteration. Sometimes useful for further analysis.
|
46 |
+
"""
|
47 |
+
num_iter, warmup = int(num_iter), int(warmup)
|
48 |
+
|
49 |
+
iterator = iter(iterator)
|
50 |
+
for _ in range(warmup):
|
51 |
+
next(iterator)
|
52 |
+
timer = Timer()
|
53 |
+
all_times = []
|
54 |
+
for curr_iter in tqdm.trange(num_iter):
|
55 |
+
start = timer.seconds()
|
56 |
+
if start > max_time_seconds:
|
57 |
+
num_iter = curr_iter
|
58 |
+
break
|
59 |
+
next(iterator)
|
60 |
+
all_times.append(timer.seconds() - start)
|
61 |
+
avg = timer.seconds() / num_iter
|
62 |
+
return avg, all_times
|
63 |
+
|
64 |
+
|
65 |
+
class DataLoaderBenchmark:
|
66 |
+
"""
|
67 |
+
Some common benchmarks that help understand perf bottleneck of a standard dataloader
|
68 |
+
made of dataset, mapper and sampler.
|
69 |
+
"""
|
70 |
+
|
71 |
+
def __init__(
|
72 |
+
self,
|
73 |
+
dataset,
|
74 |
+
*,
|
75 |
+
mapper,
|
76 |
+
sampler=None,
|
77 |
+
total_batch_size,
|
78 |
+
num_workers=0,
|
79 |
+
max_time_seconds: int = 90,
|
80 |
+
):
|
81 |
+
"""
|
82 |
+
Args:
|
83 |
+
max_time_seconds (int): maximum time to spent for each benchmark
|
84 |
+
other args: same as in `build.py:build_detection_train_loader`
|
85 |
+
"""
|
86 |
+
if isinstance(dataset, list):
|
87 |
+
dataset = DatasetFromList(dataset, copy=False, serialize=True)
|
88 |
+
if sampler is None:
|
89 |
+
sampler = TrainingSampler(len(dataset))
|
90 |
+
|
91 |
+
self.dataset = dataset
|
92 |
+
self.mapper = mapper
|
93 |
+
self.sampler = sampler
|
94 |
+
self.total_batch_size = total_batch_size
|
95 |
+
self.num_workers = num_workers
|
96 |
+
self.per_gpu_batch_size = self.total_batch_size // comm.get_world_size()
|
97 |
+
|
98 |
+
self.max_time_seconds = max_time_seconds
|
99 |
+
|
100 |
+
def _benchmark(self, iterator, num_iter, warmup, msg=None):
|
101 |
+
avg, all_times = iter_benchmark(iterator, num_iter, warmup, self.max_time_seconds)
|
102 |
+
if msg is not None:
|
103 |
+
self._log_time(msg, avg, all_times)
|
104 |
+
return avg, all_times
|
105 |
+
|
106 |
+
def _log_time(self, msg, avg, all_times, distributed=False):
|
107 |
+
percentiles = [np.percentile(all_times, k, interpolation="nearest") for k in [1, 5, 95, 99]]
|
108 |
+
if not distributed:
|
109 |
+
logger.info(
|
110 |
+
f"{msg}: avg={1.0/avg:.1f} it/s, "
|
111 |
+
f"p1={percentiles[0]:.2g}s, p5={percentiles[1]:.2g}s, "
|
112 |
+
f"p95={percentiles[2]:.2g}s, p99={percentiles[3]:.2g}s."
|
113 |
+
)
|
114 |
+
return
|
115 |
+
avg_per_gpu = comm.all_gather(avg)
|
116 |
+
percentiles_per_gpu = comm.all_gather(percentiles)
|
117 |
+
if comm.get_rank() > 0:
|
118 |
+
return
|
119 |
+
for idx, avg, percentiles in zip(count(), avg_per_gpu, percentiles_per_gpu):
|
120 |
+
logger.info(
|
121 |
+
f"GPU{idx} {msg}: avg={1.0/avg:.1f} it/s, "
|
122 |
+
f"p1={percentiles[0]:.2g}s, p5={percentiles[1]:.2g}s, "
|
123 |
+
f"p95={percentiles[2]:.2g}s, p99={percentiles[3]:.2g}s."
|
124 |
+
)
|
125 |
+
|
126 |
+
def benchmark_dataset(self, num_iter, warmup=5):
|
127 |
+
"""
|
128 |
+
Benchmark the speed of taking raw samples from the dataset.
|
129 |
+
"""
|
130 |
+
|
131 |
+
def loader():
|
132 |
+
while True:
|
133 |
+
for k in self.sampler:
|
134 |
+
yield self.dataset[k]
|
135 |
+
|
136 |
+
self._benchmark(loader(), num_iter, warmup, "Dataset Alone")
|
137 |
+
|
138 |
+
def benchmark_mapper(self, num_iter, warmup=5):
|
139 |
+
"""
|
140 |
+
Benchmark the speed of taking raw samples from the dataset and map
|
141 |
+
them in a single process.
|
142 |
+
"""
|
143 |
+
|
144 |
+
def loader():
|
145 |
+
while True:
|
146 |
+
for k in self.sampler:
|
147 |
+
yield self.mapper(self.dataset[k])
|
148 |
+
|
149 |
+
self._benchmark(loader(), num_iter, warmup, "Single Process Mapper (sec/sample)")
|
150 |
+
|
151 |
+
def benchmark_workers(self, num_iter, warmup=10):
|
152 |
+
"""
|
153 |
+
Benchmark the dataloader by tuning num_workers to [0, 1, self.num_workers].
|
154 |
+
"""
|
155 |
+
candidates = [0, 1]
|
156 |
+
if self.num_workers not in candidates:
|
157 |
+
candidates.append(self.num_workers)
|
158 |
+
|
159 |
+
dataset = MapDataset(self.dataset, self.mapper)
|
160 |
+
for n in candidates:
|
161 |
+
loader = build_batch_data_loader(
|
162 |
+
dataset,
|
163 |
+
self.sampler,
|
164 |
+
self.total_batch_size,
|
165 |
+
num_workers=n,
|
166 |
+
)
|
167 |
+
self._benchmark(
|
168 |
+
iter(loader),
|
169 |
+
num_iter * max(n, 1),
|
170 |
+
warmup * max(n, 1),
|
171 |
+
f"DataLoader ({n} workers, bs={self.per_gpu_batch_size})",
|
172 |
+
)
|
173 |
+
del loader
|
174 |
+
|
175 |
+
def benchmark_IPC(self, num_iter, warmup=10):
|
176 |
+
"""
|
177 |
+
Benchmark the dataloader where each worker outputs nothing. This
|
178 |
+
eliminates the IPC overhead compared to the regular dataloader.
|
179 |
+
|
180 |
+
PyTorch multiprocessing's IPC only optimizes for torch tensors.
|
181 |
+
Large numpy arrays or other data structure may incur large IPC overhead.
|
182 |
+
"""
|
183 |
+
n = self.num_workers
|
184 |
+
dataset = _EmptyMapDataset(MapDataset(self.dataset, self.mapper))
|
185 |
+
loader = build_batch_data_loader(
|
186 |
+
dataset, self.sampler, self.total_batch_size, num_workers=n
|
187 |
+
)
|
188 |
+
self._benchmark(
|
189 |
+
iter(loader),
|
190 |
+
num_iter * max(n, 1),
|
191 |
+
warmup * max(n, 1),
|
192 |
+
f"DataLoader ({n} workers, bs={self.per_gpu_batch_size}) w/o comm",
|
193 |
+
)
|
194 |
+
|
195 |
+
def benchmark_distributed(self, num_iter, warmup=10):
|
196 |
+
"""
|
197 |
+
Benchmark the dataloader in each distributed worker, and log results of
|
198 |
+
all workers. This helps understand the final performance as well as
|
199 |
+
the variances among workers.
|
200 |
+
|
201 |
+
It also prints startup time (first iter) of the dataloader.
|
202 |
+
"""
|
203 |
+
gpu = comm.get_world_size()
|
204 |
+
dataset = MapDataset(self.dataset, self.mapper)
|
205 |
+
n = self.num_workers
|
206 |
+
loader = build_batch_data_loader(
|
207 |
+
dataset, self.sampler, self.total_batch_size, num_workers=n
|
208 |
+
)
|
209 |
+
|
210 |
+
timer = Timer()
|
211 |
+
loader = iter(loader)
|
212 |
+
next(loader)
|
213 |
+
startup_time = timer.seconds()
|
214 |
+
logger.info("Dataloader startup time: {:.2f} seconds".format(startup_time))
|
215 |
+
|
216 |
+
comm.synchronize()
|
217 |
+
|
218 |
+
avg, all_times = self._benchmark(loader, num_iter * max(n, 1), warmup * max(n, 1))
|
219 |
+
del loader
|
220 |
+
self._log_time(
|
221 |
+
f"DataLoader ({gpu} GPUs x {n} workers, total bs={self.total_batch_size})",
|
222 |
+
avg,
|
223 |
+
all_times,
|
224 |
+
True,
|
225 |
+
)
|
detectron2/build/lib.linux-x86_64-3.10/detectron2/data/build.py
ADDED
@@ -0,0 +1,556 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
import itertools
|
3 |
+
import logging
|
4 |
+
import numpy as np
|
5 |
+
import operator
|
6 |
+
import pickle
|
7 |
+
from typing import Any, Callable, Dict, List, Optional, Union
|
8 |
+
import torch
|
9 |
+
import torch.utils.data as torchdata
|
10 |
+
from tabulate import tabulate
|
11 |
+
from termcolor import colored
|
12 |
+
|
13 |
+
from detectron2.config import configurable
|
14 |
+
from detectron2.structures import BoxMode
|
15 |
+
from detectron2.utils.comm import get_world_size
|
16 |
+
from detectron2.utils.env import seed_all_rng
|
17 |
+
from detectron2.utils.file_io import PathManager
|
18 |
+
from detectron2.utils.logger import _log_api_usage, log_first_n
|
19 |
+
|
20 |
+
from .catalog import DatasetCatalog, MetadataCatalog
|
21 |
+
from .common import AspectRatioGroupedDataset, DatasetFromList, MapDataset, ToIterableDataset
|
22 |
+
from .dataset_mapper import DatasetMapper
|
23 |
+
from .detection_utils import check_metadata_consistency
|
24 |
+
from .samplers import (
|
25 |
+
InferenceSampler,
|
26 |
+
RandomSubsetTrainingSampler,
|
27 |
+
RepeatFactorTrainingSampler,
|
28 |
+
TrainingSampler,
|
29 |
+
)
|
30 |
+
|
31 |
+
"""
|
32 |
+
This file contains the default logic to build a dataloader for training or testing.
|
33 |
+
"""
|
34 |
+
|
35 |
+
__all__ = [
|
36 |
+
"build_batch_data_loader",
|
37 |
+
"build_detection_train_loader",
|
38 |
+
"build_detection_test_loader",
|
39 |
+
"get_detection_dataset_dicts",
|
40 |
+
"load_proposals_into_dataset",
|
41 |
+
"print_instances_class_histogram",
|
42 |
+
]
|
43 |
+
|
44 |
+
|
45 |
+
def filter_images_with_only_crowd_annotations(dataset_dicts):
|
46 |
+
"""
|
47 |
+
Filter out images with none annotations or only crowd annotations
|
48 |
+
(i.e., images without non-crowd annotations).
|
49 |
+
A common training-time preprocessing on COCO dataset.
|
50 |
+
|
51 |
+
Args:
|
52 |
+
dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
|
53 |
+
|
54 |
+
Returns:
|
55 |
+
list[dict]: the same format, but filtered.
|
56 |
+
"""
|
57 |
+
num_before = len(dataset_dicts)
|
58 |
+
|
59 |
+
def valid(anns):
|
60 |
+
for ann in anns:
|
61 |
+
if ann.get("iscrowd", 0) == 0:
|
62 |
+
return True
|
63 |
+
return False
|
64 |
+
|
65 |
+
dataset_dicts = [x for x in dataset_dicts if valid(x["annotations"])]
|
66 |
+
num_after = len(dataset_dicts)
|
67 |
+
logger = logging.getLogger(__name__)
|
68 |
+
logger.info(
|
69 |
+
"Removed {} images with no usable annotations. {} images left.".format(
|
70 |
+
num_before - num_after, num_after
|
71 |
+
)
|
72 |
+
)
|
73 |
+
return dataset_dicts
|
74 |
+
|
75 |
+
|
76 |
+
def filter_images_with_few_keypoints(dataset_dicts, min_keypoints_per_image):
|
77 |
+
"""
|
78 |
+
Filter out images with too few number of keypoints.
|
79 |
+
|
80 |
+
Args:
|
81 |
+
dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
|
82 |
+
|
83 |
+
Returns:
|
84 |
+
list[dict]: the same format as dataset_dicts, but filtered.
|
85 |
+
"""
|
86 |
+
num_before = len(dataset_dicts)
|
87 |
+
|
88 |
+
def visible_keypoints_in_image(dic):
|
89 |
+
# Each keypoints field has the format [x1, y1, v1, ...], where v is visibility
|
90 |
+
annotations = dic["annotations"]
|
91 |
+
return sum(
|
92 |
+
(np.array(ann["keypoints"][2::3]) > 0).sum()
|
93 |
+
for ann in annotations
|
94 |
+
if "keypoints" in ann
|
95 |
+
)
|
96 |
+
|
97 |
+
dataset_dicts = [
|
98 |
+
x for x in dataset_dicts if visible_keypoints_in_image(x) >= min_keypoints_per_image
|
99 |
+
]
|
100 |
+
num_after = len(dataset_dicts)
|
101 |
+
logger = logging.getLogger(__name__)
|
102 |
+
logger.info(
|
103 |
+
"Removed {} images with fewer than {} keypoints.".format(
|
104 |
+
num_before - num_after, min_keypoints_per_image
|
105 |
+
)
|
106 |
+
)
|
107 |
+
return dataset_dicts
|
108 |
+
|
109 |
+
|
110 |
+
def load_proposals_into_dataset(dataset_dicts, proposal_file):
|
111 |
+
"""
|
112 |
+
Load precomputed object proposals into the dataset.
|
113 |
+
|
114 |
+
The proposal file should be a pickled dict with the following keys:
|
115 |
+
|
116 |
+
- "ids": list[int] or list[str], the image ids
|
117 |
+
- "boxes": list[np.ndarray], each is an Nx4 array of boxes corresponding to the image id
|
118 |
+
- "objectness_logits": list[np.ndarray], each is an N sized array of objectness scores
|
119 |
+
corresponding to the boxes.
|
120 |
+
- "bbox_mode": the BoxMode of the boxes array. Defaults to ``BoxMode.XYXY_ABS``.
|
121 |
+
|
122 |
+
Args:
|
123 |
+
dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
|
124 |
+
proposal_file (str): file path of pre-computed proposals, in pkl format.
|
125 |
+
|
126 |
+
Returns:
|
127 |
+
list[dict]: the same format as dataset_dicts, but added proposal field.
|
128 |
+
"""
|
129 |
+
logger = logging.getLogger(__name__)
|
130 |
+
logger.info("Loading proposals from: {}".format(proposal_file))
|
131 |
+
|
132 |
+
with PathManager.open(proposal_file, "rb") as f:
|
133 |
+
proposals = pickle.load(f, encoding="latin1")
|
134 |
+
|
135 |
+
# Rename the key names in D1 proposal files
|
136 |
+
rename_keys = {"indexes": "ids", "scores": "objectness_logits"}
|
137 |
+
for key in rename_keys:
|
138 |
+
if key in proposals:
|
139 |
+
proposals[rename_keys[key]] = proposals.pop(key)
|
140 |
+
|
141 |
+
# Fetch the indexes of all proposals that are in the dataset
|
142 |
+
# Convert image_id to str since they could be int.
|
143 |
+
img_ids = set({str(record["image_id"]) for record in dataset_dicts})
|
144 |
+
id_to_index = {str(id): i for i, id in enumerate(proposals["ids"]) if str(id) in img_ids}
|
145 |
+
|
146 |
+
# Assuming default bbox_mode of precomputed proposals are 'XYXY_ABS'
|
147 |
+
bbox_mode = BoxMode(proposals["bbox_mode"]) if "bbox_mode" in proposals else BoxMode.XYXY_ABS
|
148 |
+
|
149 |
+
for record in dataset_dicts:
|
150 |
+
# Get the index of the proposal
|
151 |
+
i = id_to_index[str(record["image_id"])]
|
152 |
+
|
153 |
+
boxes = proposals["boxes"][i]
|
154 |
+
objectness_logits = proposals["objectness_logits"][i]
|
155 |
+
# Sort the proposals in descending order of the scores
|
156 |
+
inds = objectness_logits.argsort()[::-1]
|
157 |
+
record["proposal_boxes"] = boxes[inds]
|
158 |
+
record["proposal_objectness_logits"] = objectness_logits[inds]
|
159 |
+
record["proposal_bbox_mode"] = bbox_mode
|
160 |
+
|
161 |
+
return dataset_dicts
|
162 |
+
|
163 |
+
|
164 |
+
def print_instances_class_histogram(dataset_dicts, class_names):
|
165 |
+
"""
|
166 |
+
Args:
|
167 |
+
dataset_dicts (list[dict]): list of dataset dicts.
|
168 |
+
class_names (list[str]): list of class names (zero-indexed).
|
169 |
+
"""
|
170 |
+
num_classes = len(class_names)
|
171 |
+
hist_bins = np.arange(num_classes + 1)
|
172 |
+
histogram = np.zeros((num_classes,), dtype=np.int)
|
173 |
+
for entry in dataset_dicts:
|
174 |
+
annos = entry["annotations"]
|
175 |
+
classes = np.asarray(
|
176 |
+
[x["category_id"] for x in annos if not x.get("iscrowd", 0)], dtype=np.int
|
177 |
+
)
|
178 |
+
if len(classes):
|
179 |
+
assert classes.min() >= 0, f"Got an invalid category_id={classes.min()}"
|
180 |
+
assert (
|
181 |
+
classes.max() < num_classes
|
182 |
+
), f"Got an invalid category_id={classes.max()} for a dataset of {num_classes} classes"
|
183 |
+
histogram += np.histogram(classes, bins=hist_bins)[0]
|
184 |
+
|
185 |
+
N_COLS = min(6, len(class_names) * 2)
|
186 |
+
|
187 |
+
def short_name(x):
|
188 |
+
# make long class names shorter. useful for lvis
|
189 |
+
if len(x) > 13:
|
190 |
+
return x[:11] + ".."
|
191 |
+
return x
|
192 |
+
|
193 |
+
data = list(
|
194 |
+
itertools.chain(*[[short_name(class_names[i]), int(v)] for i, v in enumerate(histogram)])
|
195 |
+
)
|
196 |
+
total_num_instances = sum(data[1::2])
|
197 |
+
data.extend([None] * (N_COLS - (len(data) % N_COLS)))
|
198 |
+
if num_classes > 1:
|
199 |
+
data.extend(["total", total_num_instances])
|
200 |
+
data = itertools.zip_longest(*[data[i::N_COLS] for i in range(N_COLS)])
|
201 |
+
table = tabulate(
|
202 |
+
data,
|
203 |
+
headers=["category", "#instances"] * (N_COLS // 2),
|
204 |
+
tablefmt="pipe",
|
205 |
+
numalign="left",
|
206 |
+
stralign="center",
|
207 |
+
)
|
208 |
+
log_first_n(
|
209 |
+
logging.INFO,
|
210 |
+
"Distribution of instances among all {} categories:\n".format(num_classes)
|
211 |
+
+ colored(table, "cyan"),
|
212 |
+
key="message",
|
213 |
+
)
|
214 |
+
|
215 |
+
|
216 |
+
def get_detection_dataset_dicts(
|
217 |
+
names,
|
218 |
+
filter_empty=True,
|
219 |
+
min_keypoints=0,
|
220 |
+
proposal_files=None,
|
221 |
+
check_consistency=True,
|
222 |
+
):
|
223 |
+
"""
|
224 |
+
Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation.
|
225 |
+
|
226 |
+
Args:
|
227 |
+
names (str or list[str]): a dataset name or a list of dataset names
|
228 |
+
filter_empty (bool): whether to filter out images without instance annotations
|
229 |
+
min_keypoints (int): filter out images with fewer keypoints than
|
230 |
+
`min_keypoints`. Set to 0 to do nothing.
|
231 |
+
proposal_files (list[str]): if given, a list of object proposal files
|
232 |
+
that match each dataset in `names`.
|
233 |
+
check_consistency (bool): whether to check if datasets have consistent metadata.
|
234 |
+
|
235 |
+
Returns:
|
236 |
+
list[dict]: a list of dicts following the standard dataset dict format.
|
237 |
+
"""
|
238 |
+
if isinstance(names, str):
|
239 |
+
names = [names]
|
240 |
+
assert len(names), names
|
241 |
+
dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in names]
|
242 |
+
|
243 |
+
if isinstance(dataset_dicts[0], torchdata.Dataset):
|
244 |
+
if len(dataset_dicts) > 1:
|
245 |
+
# ConcatDataset does not work for iterable style dataset.
|
246 |
+
# We could support concat for iterable as well, but it's often
|
247 |
+
# not a good idea to concat iterables anyway.
|
248 |
+
return torchdata.ConcatDataset(dataset_dicts)
|
249 |
+
return dataset_dicts[0]
|
250 |
+
|
251 |
+
for dataset_name, dicts in zip(names, dataset_dicts):
|
252 |
+
assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)
|
253 |
+
|
254 |
+
if proposal_files is not None:
|
255 |
+
assert len(names) == len(proposal_files)
|
256 |
+
# load precomputed proposals from proposal files
|
257 |
+
dataset_dicts = [
|
258 |
+
load_proposals_into_dataset(dataset_i_dicts, proposal_file)
|
259 |
+
for dataset_i_dicts, proposal_file in zip(dataset_dicts, proposal_files)
|
260 |
+
]
|
261 |
+
|
262 |
+
dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))
|
263 |
+
|
264 |
+
has_instances = "annotations" in dataset_dicts[0]
|
265 |
+
if filter_empty and has_instances:
|
266 |
+
dataset_dicts = filter_images_with_only_crowd_annotations(dataset_dicts)
|
267 |
+
if min_keypoints > 0 and has_instances:
|
268 |
+
dataset_dicts = filter_images_with_few_keypoints(dataset_dicts, min_keypoints)
|
269 |
+
|
270 |
+
if check_consistency and has_instances:
|
271 |
+
try:
|
272 |
+
class_names = MetadataCatalog.get(names[0]).thing_classes
|
273 |
+
check_metadata_consistency("thing_classes", names)
|
274 |
+
print_instances_class_histogram(dataset_dicts, class_names)
|
275 |
+
except AttributeError: # class names are not available for this dataset
|
276 |
+
pass
|
277 |
+
|
278 |
+
assert len(dataset_dicts), "No valid data found in {}.".format(",".join(names))
|
279 |
+
return dataset_dicts
|
280 |
+
|
281 |
+
|
282 |
+
def build_batch_data_loader(
|
283 |
+
dataset,
|
284 |
+
sampler,
|
285 |
+
total_batch_size,
|
286 |
+
*,
|
287 |
+
aspect_ratio_grouping=False,
|
288 |
+
num_workers=0,
|
289 |
+
collate_fn=None,
|
290 |
+
):
|
291 |
+
"""
|
292 |
+
Build a batched dataloader. The main differences from `torch.utils.data.DataLoader` are:
|
293 |
+
1. support aspect ratio grouping options
|
294 |
+
2. use no "batch collation", because this is common for detection training
|
295 |
+
|
296 |
+
Args:
|
297 |
+
dataset (torch.utils.data.Dataset): a pytorch map-style or iterable dataset.
|
298 |
+
sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces indices.
|
299 |
+
Must be provided iff. ``dataset`` is a map-style dataset.
|
300 |
+
total_batch_size, aspect_ratio_grouping, num_workers, collate_fn: see
|
301 |
+
:func:`build_detection_train_loader`.
|
302 |
+
|
303 |
+
Returns:
|
304 |
+
iterable[list]. Length of each list is the batch size of the current
|
305 |
+
GPU. Each element in the list comes from the dataset.
|
306 |
+
"""
|
307 |
+
world_size = get_world_size()
|
308 |
+
assert (
|
309 |
+
total_batch_size > 0 and total_batch_size % world_size == 0
|
310 |
+
), "Total batch size ({}) must be divisible by the number of gpus ({}).".format(
|
311 |
+
total_batch_size, world_size
|
312 |
+
)
|
313 |
+
batch_size = total_batch_size // world_size
|
314 |
+
|
315 |
+
if isinstance(dataset, torchdata.IterableDataset):
|
316 |
+
assert sampler is None, "sampler must be None if dataset is IterableDataset"
|
317 |
+
else:
|
318 |
+
dataset = ToIterableDataset(dataset, sampler)
|
319 |
+
|
320 |
+
if aspect_ratio_grouping:
|
321 |
+
data_loader = torchdata.DataLoader(
|
322 |
+
dataset,
|
323 |
+
num_workers=num_workers,
|
324 |
+
collate_fn=operator.itemgetter(0), # don't batch, but yield individual elements
|
325 |
+
worker_init_fn=worker_init_reset_seed,
|
326 |
+
) # yield individual mapped dict
|
327 |
+
data_loader = AspectRatioGroupedDataset(data_loader, batch_size)
|
328 |
+
if collate_fn is None:
|
329 |
+
return data_loader
|
330 |
+
return MapDataset(data_loader, collate_fn)
|
331 |
+
else:
|
332 |
+
return torchdata.DataLoader(
|
333 |
+
dataset,
|
334 |
+
batch_size=batch_size,
|
335 |
+
drop_last=True,
|
336 |
+
num_workers=num_workers,
|
337 |
+
collate_fn=trivial_batch_collator if collate_fn is None else collate_fn,
|
338 |
+
worker_init_fn=worker_init_reset_seed,
|
339 |
+
)
|
340 |
+
|
341 |
+
|
342 |
+
def _train_loader_from_config(cfg, mapper=None, *, dataset=None, sampler=None):
|
343 |
+
if dataset is None:
|
344 |
+
dataset = get_detection_dataset_dicts(
|
345 |
+
cfg.DATASETS.TRAIN,
|
346 |
+
filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
|
347 |
+
min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
|
348 |
+
if cfg.MODEL.KEYPOINT_ON
|
349 |
+
else 0,
|
350 |
+
proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
|
351 |
+
)
|
352 |
+
_log_api_usage("dataset." + cfg.DATASETS.TRAIN[0])
|
353 |
+
|
354 |
+
if mapper is None:
|
355 |
+
mapper = DatasetMapper(cfg, True)
|
356 |
+
|
357 |
+
if sampler is None:
|
358 |
+
sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
|
359 |
+
logger = logging.getLogger(__name__)
|
360 |
+
if isinstance(dataset, torchdata.IterableDataset):
|
361 |
+
logger.info("Not using any sampler since the dataset is IterableDataset.")
|
362 |
+
sampler = None
|
363 |
+
else:
|
364 |
+
logger.info("Using training sampler {}".format(sampler_name))
|
365 |
+
if sampler_name == "TrainingSampler":
|
366 |
+
sampler = TrainingSampler(len(dataset))
|
367 |
+
elif sampler_name == "RepeatFactorTrainingSampler":
|
368 |
+
repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency(
|
369 |
+
dataset, cfg.DATALOADER.REPEAT_THRESHOLD
|
370 |
+
)
|
371 |
+
sampler = RepeatFactorTrainingSampler(repeat_factors)
|
372 |
+
elif sampler_name == "RandomSubsetTrainingSampler":
|
373 |
+
sampler = RandomSubsetTrainingSampler(
|
374 |
+
len(dataset), cfg.DATALOADER.RANDOM_SUBSET_RATIO
|
375 |
+
)
|
376 |
+
else:
|
377 |
+
raise ValueError("Unknown training sampler: {}".format(sampler_name))
|
378 |
+
|
379 |
+
return {
|
380 |
+
"dataset": dataset,
|
381 |
+
"sampler": sampler,
|
382 |
+
"mapper": mapper,
|
383 |
+
"total_batch_size": cfg.SOLVER.IMS_PER_BATCH,
|
384 |
+
"aspect_ratio_grouping": cfg.DATALOADER.ASPECT_RATIO_GROUPING,
|
385 |
+
"num_workers": cfg.DATALOADER.NUM_WORKERS,
|
386 |
+
}
|
387 |
+
|
388 |
+
|
389 |
+
@configurable(from_config=_train_loader_from_config)
|
390 |
+
def build_detection_train_loader(
|
391 |
+
dataset,
|
392 |
+
*,
|
393 |
+
mapper,
|
394 |
+
sampler=None,
|
395 |
+
total_batch_size,
|
396 |
+
aspect_ratio_grouping=True,
|
397 |
+
num_workers=0,
|
398 |
+
collate_fn=None,
|
399 |
+
):
|
400 |
+
"""
|
401 |
+
Build a dataloader for object detection with some default features.
|
402 |
+
|
403 |
+
Args:
|
404 |
+
dataset (list or torch.utils.data.Dataset): a list of dataset dicts,
|
405 |
+
or a pytorch dataset (either map-style or iterable). It can be obtained
|
406 |
+
by using :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
|
407 |
+
mapper (callable): a callable which takes a sample (dict) from dataset and
|
408 |
+
returns the format to be consumed by the model.
|
409 |
+
When using cfg, the default choice is ``DatasetMapper(cfg, is_train=True)``.
|
410 |
+
sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces
|
411 |
+
indices to be applied on ``dataset``.
|
412 |
+
If ``dataset`` is map-style, the default sampler is a :class:`TrainingSampler`,
|
413 |
+
which coordinates an infinite random shuffle sequence across all workers.
|
414 |
+
Sampler must be None if ``dataset`` is iterable.
|
415 |
+
total_batch_size (int): total batch size across all workers.
|
416 |
+
aspect_ratio_grouping (bool): whether to group images with similar
|
417 |
+
aspect ratio for efficiency. When enabled, it requires each
|
418 |
+
element in dataset be a dict with keys "width" and "height".
|
419 |
+
num_workers (int): number of parallel data loading workers
|
420 |
+
collate_fn: a function that determines how to do batching, same as the argument of
|
421 |
+
`torch.utils.data.DataLoader`. Defaults to do no collation and return a list of
|
422 |
+
data. No collation is OK for small batch size and simple data structures.
|
423 |
+
If your batch size is large and each sample contains too many small tensors,
|
424 |
+
it's more efficient to collate them in data loader.
|
425 |
+
|
426 |
+
Returns:
|
427 |
+
torch.utils.data.DataLoader:
|
428 |
+
a dataloader. Each output from it is a ``list[mapped_element]`` of length
|
429 |
+
``total_batch_size / num_workers``, where ``mapped_element`` is produced
|
430 |
+
by the ``mapper``.
|
431 |
+
"""
|
432 |
+
if isinstance(dataset, list):
|
433 |
+
dataset = DatasetFromList(dataset, copy=False)
|
434 |
+
if mapper is not None:
|
435 |
+
dataset = MapDataset(dataset, mapper)
|
436 |
+
|
437 |
+
if isinstance(dataset, torchdata.IterableDataset):
|
438 |
+
assert sampler is None, "sampler must be None if dataset is IterableDataset"
|
439 |
+
else:
|
440 |
+
if sampler is None:
|
441 |
+
sampler = TrainingSampler(len(dataset))
|
442 |
+
assert isinstance(sampler, torchdata.Sampler), f"Expect a Sampler but got {type(sampler)}"
|
443 |
+
return build_batch_data_loader(
|
444 |
+
dataset,
|
445 |
+
sampler,
|
446 |
+
total_batch_size,
|
447 |
+
aspect_ratio_grouping=aspect_ratio_grouping,
|
448 |
+
num_workers=num_workers,
|
449 |
+
collate_fn=collate_fn,
|
450 |
+
)
|
451 |
+
|
452 |
+
|
453 |
+
def _test_loader_from_config(cfg, dataset_name, mapper=None):
|
454 |
+
"""
|
455 |
+
Uses the given `dataset_name` argument (instead of the names in cfg), because the
|
456 |
+
standard practice is to evaluate each test set individually (not combining them).
|
457 |
+
"""
|
458 |
+
if isinstance(dataset_name, str):
|
459 |
+
dataset_name = [dataset_name]
|
460 |
+
|
461 |
+
dataset = get_detection_dataset_dicts(
|
462 |
+
dataset_name,
|
463 |
+
filter_empty=False,
|
464 |
+
proposal_files=[
|
465 |
+
cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(x)] for x in dataset_name
|
466 |
+
]
|
467 |
+
if cfg.MODEL.LOAD_PROPOSALS
|
468 |
+
else None,
|
469 |
+
)
|
470 |
+
if mapper is None:
|
471 |
+
mapper = DatasetMapper(cfg, False)
|
472 |
+
return {
|
473 |
+
"dataset": dataset,
|
474 |
+
"mapper": mapper,
|
475 |
+
"num_workers": cfg.DATALOADER.NUM_WORKERS,
|
476 |
+
"sampler": InferenceSampler(len(dataset))
|
477 |
+
if not isinstance(dataset, torchdata.IterableDataset)
|
478 |
+
else None,
|
479 |
+
}
|
480 |
+
|
481 |
+
|
482 |
+
@configurable(from_config=_test_loader_from_config)
|
483 |
+
def build_detection_test_loader(
|
484 |
+
dataset: Union[List[Any], torchdata.Dataset],
|
485 |
+
*,
|
486 |
+
mapper: Callable[[Dict[str, Any]], Any],
|
487 |
+
sampler: Optional[torchdata.Sampler] = None,
|
488 |
+
batch_size: int = 1,
|
489 |
+
num_workers: int = 0,
|
490 |
+
collate_fn: Optional[Callable[[List[Any]], Any]] = None,
|
491 |
+
) -> torchdata.DataLoader:
|
492 |
+
"""
|
493 |
+
Similar to `build_detection_train_loader`, with default batch size = 1,
|
494 |
+
and sampler = :class:`InferenceSampler`. This sampler coordinates all workers
|
495 |
+
to produce the exact set of all samples.
|
496 |
+
|
497 |
+
Args:
|
498 |
+
dataset: a list of dataset dicts,
|
499 |
+
or a pytorch dataset (either map-style or iterable). They can be obtained
|
500 |
+
by using :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
|
501 |
+
mapper: a callable which takes a sample (dict) from dataset
|
502 |
+
and returns the format to be consumed by the model.
|
503 |
+
When using cfg, the default choice is ``DatasetMapper(cfg, is_train=False)``.
|
504 |
+
sampler: a sampler that produces
|
505 |
+
indices to be applied on ``dataset``. Default to :class:`InferenceSampler`,
|
506 |
+
which splits the dataset across all workers. Sampler must be None
|
507 |
+
if `dataset` is iterable.
|
508 |
+
batch_size: the batch size of the data loader to be created.
|
509 |
+
Default to 1 image per worker since this is the standard when reporting
|
510 |
+
inference time in papers.
|
511 |
+
num_workers: number of parallel data loading workers
|
512 |
+
collate_fn: same as the argument of `torch.utils.data.DataLoader`.
|
513 |
+
Defaults to do no collation and return a list of data.
|
514 |
+
|
515 |
+
Returns:
|
516 |
+
DataLoader: a torch DataLoader, that loads the given detection
|
517 |
+
dataset, with test-time transformation and batching.
|
518 |
+
|
519 |
+
Examples:
|
520 |
+
::
|
521 |
+
data_loader = build_detection_test_loader(
|
522 |
+
DatasetRegistry.get("my_test"),
|
523 |
+
mapper=DatasetMapper(...))
|
524 |
+
|
525 |
+
# or, instantiate with a CfgNode:
|
526 |
+
data_loader = build_detection_test_loader(cfg, "my_test")
|
527 |
+
"""
|
528 |
+
if isinstance(dataset, list):
|
529 |
+
dataset = DatasetFromList(dataset, copy=False)
|
530 |
+
if mapper is not None:
|
531 |
+
dataset = MapDataset(dataset, mapper)
|
532 |
+
if isinstance(dataset, torchdata.IterableDataset):
|
533 |
+
assert sampler is None, "sampler must be None if dataset is IterableDataset"
|
534 |
+
else:
|
535 |
+
if sampler is None:
|
536 |
+
sampler = InferenceSampler(len(dataset))
|
537 |
+
return torchdata.DataLoader(
|
538 |
+
dataset,
|
539 |
+
batch_size=batch_size,
|
540 |
+
sampler=sampler,
|
541 |
+
drop_last=False,
|
542 |
+
num_workers=num_workers,
|
543 |
+
collate_fn=trivial_batch_collator if collate_fn is None else collate_fn,
|
544 |
+
)
|
545 |
+
|
546 |
+
|
547 |
+
def trivial_batch_collator(batch):
|
548 |
+
"""
|
549 |
+
A batch collator that does nothing.
|
550 |
+
"""
|
551 |
+
return batch
|
552 |
+
|
553 |
+
|
554 |
+
def worker_init_reset_seed(worker_id):
|
555 |
+
initial_seed = torch.initial_seed() % 2 ** 31
|
556 |
+
seed_all_rng(initial_seed + worker_id)
|
detectron2/build/lib.linux-x86_64-3.10/detectron2/data/catalog.py
ADDED
@@ -0,0 +1,236 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
import copy
|
3 |
+
import logging
|
4 |
+
import types
|
5 |
+
from collections import UserDict
|
6 |
+
from typing import List
|
7 |
+
|
8 |
+
from detectron2.utils.logger import log_first_n
|
9 |
+
|
10 |
+
__all__ = ["DatasetCatalog", "MetadataCatalog", "Metadata"]
|
11 |
+
|
12 |
+
|
13 |
+
class _DatasetCatalog(UserDict):
|
14 |
+
"""
|
15 |
+
A global dictionary that stores information about the datasets and how to obtain them.
|
16 |
+
|
17 |
+
It contains a mapping from strings
|
18 |
+
(which are names that identify a dataset, e.g. "coco_2014_train")
|
19 |
+
to a function which parses the dataset and returns the samples in the
|
20 |
+
format of `list[dict]`.
|
21 |
+
|
22 |
+
The returned dicts should be in Detectron2 Dataset format (See DATASETS.md for details)
|
23 |
+
if used with the data loader functionalities in `data/build.py,data/detection_transform.py`.
|
24 |
+
|
25 |
+
The purpose of having this catalog is to make it easy to choose
|
26 |
+
different datasets, by just using the strings in the config.
|
27 |
+
"""
|
28 |
+
|
29 |
+
def register(self, name, func):
|
30 |
+
"""
|
31 |
+
Args:
|
32 |
+
name (str): the name that identifies a dataset, e.g. "coco_2014_train".
|
33 |
+
func (callable): a callable which takes no arguments and returns a list of dicts.
|
34 |
+
It must return the same results if called multiple times.
|
35 |
+
"""
|
36 |
+
assert callable(func), "You must register a function with `DatasetCatalog.register`!"
|
37 |
+
assert name not in self, "Dataset '{}' is already registered!".format(name)
|
38 |
+
self[name] = func
|
39 |
+
|
40 |
+
def get(self, name):
|
41 |
+
"""
|
42 |
+
Call the registered function and return its results.
|
43 |
+
|
44 |
+
Args:
|
45 |
+
name (str): the name that identifies a dataset, e.g. "coco_2014_train".
|
46 |
+
|
47 |
+
Returns:
|
48 |
+
list[dict]: dataset annotations.
|
49 |
+
"""
|
50 |
+
try:
|
51 |
+
f = self[name]
|
52 |
+
except KeyError as e:
|
53 |
+
raise KeyError(
|
54 |
+
"Dataset '{}' is not registered! Available datasets are: {}".format(
|
55 |
+
name, ", ".join(list(self.keys()))
|
56 |
+
)
|
57 |
+
) from e
|
58 |
+
return f()
|
59 |
+
|
60 |
+
def list(self) -> List[str]:
|
61 |
+
"""
|
62 |
+
List all registered datasets.
|
63 |
+
|
64 |
+
Returns:
|
65 |
+
list[str]
|
66 |
+
"""
|
67 |
+
return list(self.keys())
|
68 |
+
|
69 |
+
def remove(self, name):
|
70 |
+
"""
|
71 |
+
Alias of ``pop``.
|
72 |
+
"""
|
73 |
+
self.pop(name)
|
74 |
+
|
75 |
+
def __str__(self):
|
76 |
+
return "DatasetCatalog(registered datasets: {})".format(", ".join(self.keys()))
|
77 |
+
|
78 |
+
__repr__ = __str__
|
79 |
+
|
80 |
+
|
81 |
+
DatasetCatalog = _DatasetCatalog()
|
82 |
+
DatasetCatalog.__doc__ = (
|
83 |
+
_DatasetCatalog.__doc__
|
84 |
+
+ """
|
85 |
+
.. automethod:: detectron2.data.catalog.DatasetCatalog.register
|
86 |
+
.. automethod:: detectron2.data.catalog.DatasetCatalog.get
|
87 |
+
"""
|
88 |
+
)
|
89 |
+
|
90 |
+
|
91 |
+
class Metadata(types.SimpleNamespace):
|
92 |
+
"""
|
93 |
+
A class that supports simple attribute setter/getter.
|
94 |
+
It is intended for storing metadata of a dataset and make it accessible globally.
|
95 |
+
|
96 |
+
Examples:
|
97 |
+
::
|
98 |
+
# somewhere when you load the data:
|
99 |
+
MetadataCatalog.get("mydataset").thing_classes = ["person", "dog"]
|
100 |
+
|
101 |
+
# somewhere when you print statistics or visualize:
|
102 |
+
classes = MetadataCatalog.get("mydataset").thing_classes
|
103 |
+
"""
|
104 |
+
|
105 |
+
# the name of the dataset
|
106 |
+
# set default to N/A so that `self.name` in the errors will not trigger getattr again
|
107 |
+
name: str = "N/A"
|
108 |
+
|
109 |
+
_RENAMED = {
|
110 |
+
"class_names": "thing_classes",
|
111 |
+
"dataset_id_to_contiguous_id": "thing_dataset_id_to_contiguous_id",
|
112 |
+
"stuff_class_names": "stuff_classes",
|
113 |
+
}
|
114 |
+
|
115 |
+
def __getattr__(self, key):
|
116 |
+
if key in self._RENAMED:
|
117 |
+
log_first_n(
|
118 |
+
logging.WARNING,
|
119 |
+
"Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]),
|
120 |
+
n=10,
|
121 |
+
)
|
122 |
+
return getattr(self, self._RENAMED[key])
|
123 |
+
|
124 |
+
# "name" exists in every metadata
|
125 |
+
if len(self.__dict__) > 1:
|
126 |
+
raise AttributeError(
|
127 |
+
"Attribute '{}' does not exist in the metadata of dataset '{}'. Available "
|
128 |
+
"keys are {}.".format(key, self.name, str(self.__dict__.keys()))
|
129 |
+
)
|
130 |
+
else:
|
131 |
+
raise AttributeError(
|
132 |
+
f"Attribute '{key}' does not exist in the metadata of dataset '{self.name}': "
|
133 |
+
"metadata is empty."
|
134 |
+
)
|
135 |
+
|
136 |
+
def __setattr__(self, key, val):
|
137 |
+
if key in self._RENAMED:
|
138 |
+
log_first_n(
|
139 |
+
logging.WARNING,
|
140 |
+
"Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]),
|
141 |
+
n=10,
|
142 |
+
)
|
143 |
+
setattr(self, self._RENAMED[key], val)
|
144 |
+
|
145 |
+
# Ensure that metadata of the same name stays consistent
|
146 |
+
try:
|
147 |
+
oldval = getattr(self, key)
|
148 |
+
assert oldval == val, (
|
149 |
+
"Attribute '{}' in the metadata of '{}' cannot be set "
|
150 |
+
"to a different value!\n{} != {}".format(key, self.name, oldval, val)
|
151 |
+
)
|
152 |
+
except AttributeError:
|
153 |
+
super().__setattr__(key, val)
|
154 |
+
|
155 |
+
def as_dict(self):
|
156 |
+
"""
|
157 |
+
Returns all the metadata as a dict.
|
158 |
+
Note that modifications to the returned dict will not reflect on the Metadata object.
|
159 |
+
"""
|
160 |
+
return copy.copy(self.__dict__)
|
161 |
+
|
162 |
+
def set(self, **kwargs):
|
163 |
+
"""
|
164 |
+
Set multiple metadata with kwargs.
|
165 |
+
"""
|
166 |
+
for k, v in kwargs.items():
|
167 |
+
setattr(self, k, v)
|
168 |
+
return self
|
169 |
+
|
170 |
+
def get(self, key, default=None):
|
171 |
+
"""
|
172 |
+
Access an attribute and return its value if exists.
|
173 |
+
Otherwise return default.
|
174 |
+
"""
|
175 |
+
try:
|
176 |
+
return getattr(self, key)
|
177 |
+
except AttributeError:
|
178 |
+
return default
|
179 |
+
|
180 |
+
|
181 |
+
class _MetadataCatalog(UserDict):
|
182 |
+
"""
|
183 |
+
MetadataCatalog is a global dictionary that provides access to
|
184 |
+
:class:`Metadata` of a given dataset.
|
185 |
+
|
186 |
+
The metadata associated with a certain name is a singleton: once created, the
|
187 |
+
metadata will stay alive and will be returned by future calls to ``get(name)``.
|
188 |
+
|
189 |
+
It's like global variables, so don't abuse it.
|
190 |
+
It's meant for storing knowledge that's constant and shared across the execution
|
191 |
+
of the program, e.g.: the class names in COCO.
|
192 |
+
"""
|
193 |
+
|
194 |
+
def get(self, name):
|
195 |
+
"""
|
196 |
+
Args:
|
197 |
+
name (str): name of a dataset (e.g. coco_2014_train).
|
198 |
+
|
199 |
+
Returns:
|
200 |
+
Metadata: The :class:`Metadata` instance associated with this name,
|
201 |
+
or create an empty one if none is available.
|
202 |
+
"""
|
203 |
+
assert len(name)
|
204 |
+
r = super().get(name, None)
|
205 |
+
if r is None:
|
206 |
+
r = self[name] = Metadata(name=name)
|
207 |
+
return r
|
208 |
+
|
209 |
+
def list(self):
|
210 |
+
"""
|
211 |
+
List all registered metadata.
|
212 |
+
|
213 |
+
Returns:
|
214 |
+
list[str]: keys (names of datasets) of all registered metadata
|
215 |
+
"""
|
216 |
+
return list(self.keys())
|
217 |
+
|
218 |
+
def remove(self, name):
|
219 |
+
"""
|
220 |
+
Alias of ``pop``.
|
221 |
+
"""
|
222 |
+
self.pop(name)
|
223 |
+
|
224 |
+
def __str__(self):
|
225 |
+
return "MetadataCatalog(registered metadata: {})".format(", ".join(self.keys()))
|
226 |
+
|
227 |
+
__repr__ = __str__
|
228 |
+
|
229 |
+
|
230 |
+
MetadataCatalog = _MetadataCatalog()
|
231 |
+
MetadataCatalog.__doc__ = (
|
232 |
+
_MetadataCatalog.__doc__
|
233 |
+
+ """
|
234 |
+
.. automethod:: detectron2.data.catalog.MetadataCatalog.get
|
235 |
+
"""
|
236 |
+
)
|
detectron2/build/lib.linux-x86_64-3.10/detectron2/data/common.py
ADDED
@@ -0,0 +1,244 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
import copy
|
3 |
+
import itertools
|
4 |
+
import logging
|
5 |
+
import numpy as np
|
6 |
+
import pickle
|
7 |
+
import random
|
8 |
+
import torch.utils.data as data
|
9 |
+
from torch.utils.data.sampler import Sampler
|
10 |
+
|
11 |
+
from detectron2.utils.serialize import PicklableWrapper
|
12 |
+
|
13 |
+
__all__ = ["MapDataset", "DatasetFromList", "AspectRatioGroupedDataset", "ToIterableDataset"]
|
14 |
+
|
15 |
+
|
16 |
+
def _shard_iterator_dataloader_worker(iterable):
|
17 |
+
# Shard the iterable if we're currently inside pytorch dataloader worker.
|
18 |
+
worker_info = data.get_worker_info()
|
19 |
+
if worker_info is None or worker_info.num_workers == 1:
|
20 |
+
# do nothing
|
21 |
+
yield from iterable
|
22 |
+
else:
|
23 |
+
yield from itertools.islice(iterable, worker_info.id, None, worker_info.num_workers)
|
24 |
+
|
25 |
+
|
26 |
+
class _MapIterableDataset(data.IterableDataset):
|
27 |
+
"""
|
28 |
+
Map a function over elements in an IterableDataset.
|
29 |
+
|
30 |
+
Similar to pytorch's MapIterDataPipe, but support filtering when map_func
|
31 |
+
returns None.
|
32 |
+
|
33 |
+
This class is not public-facing. Will be called by `MapDataset`.
|
34 |
+
"""
|
35 |
+
|
36 |
+
def __init__(self, dataset, map_func):
|
37 |
+
self._dataset = dataset
|
38 |
+
self._map_func = PicklableWrapper(map_func) # wrap so that a lambda will work
|
39 |
+
|
40 |
+
def __len__(self):
|
41 |
+
return len(self._dataset)
|
42 |
+
|
43 |
+
def __iter__(self):
|
44 |
+
for x in map(self._map_func, self._dataset):
|
45 |
+
if x is not None:
|
46 |
+
yield x
|
47 |
+
|
48 |
+
|
49 |
+
class MapDataset(data.Dataset):
|
50 |
+
"""
|
51 |
+
Map a function over the elements in a dataset.
|
52 |
+
"""
|
53 |
+
|
54 |
+
def __init__(self, dataset, map_func):
|
55 |
+
"""
|
56 |
+
Args:
|
57 |
+
dataset: a dataset where map function is applied. Can be either
|
58 |
+
map-style or iterable dataset. When given an iterable dataset,
|
59 |
+
the returned object will also be an iterable dataset.
|
60 |
+
map_func: a callable which maps the element in dataset. map_func can
|
61 |
+
return None to skip the data (e.g. in case of errors).
|
62 |
+
How None is handled depends on the style of `dataset`.
|
63 |
+
If `dataset` is map-style, it randomly tries other elements.
|
64 |
+
If `dataset` is iterable, it skips the data and tries the next.
|
65 |
+
"""
|
66 |
+
self._dataset = dataset
|
67 |
+
self._map_func = PicklableWrapper(map_func) # wrap so that a lambda will work
|
68 |
+
|
69 |
+
self._rng = random.Random(42)
|
70 |
+
self._fallback_candidates = set(range(len(dataset)))
|
71 |
+
|
72 |
+
def __new__(cls, dataset, map_func):
|
73 |
+
is_iterable = isinstance(dataset, data.IterableDataset)
|
74 |
+
if is_iterable:
|
75 |
+
return _MapIterableDataset(dataset, map_func)
|
76 |
+
else:
|
77 |
+
return super().__new__(cls)
|
78 |
+
|
79 |
+
def __getnewargs__(self):
|
80 |
+
return self._dataset, self._map_func
|
81 |
+
|
82 |
+
def __len__(self):
|
83 |
+
return len(self._dataset)
|
84 |
+
|
85 |
+
def __getitem__(self, idx):
|
86 |
+
retry_count = 0
|
87 |
+
cur_idx = int(idx)
|
88 |
+
|
89 |
+
while True:
|
90 |
+
data = self._map_func(self._dataset[cur_idx])
|
91 |
+
if data is not None:
|
92 |
+
self._fallback_candidates.add(cur_idx)
|
93 |
+
return data
|
94 |
+
|
95 |
+
# _map_func fails for this idx, use a random new index from the pool
|
96 |
+
retry_count += 1
|
97 |
+
self._fallback_candidates.discard(cur_idx)
|
98 |
+
cur_idx = self._rng.sample(self._fallback_candidates, k=1)[0]
|
99 |
+
|
100 |
+
if retry_count >= 3:
|
101 |
+
logger = logging.getLogger(__name__)
|
102 |
+
logger.warning(
|
103 |
+
"Failed to apply `_map_func` for idx: {}, retry count: {}".format(
|
104 |
+
idx, retry_count
|
105 |
+
)
|
106 |
+
)
|
107 |
+
|
108 |
+
|
109 |
+
class DatasetFromList(data.Dataset):
|
110 |
+
"""
|
111 |
+
Wrap a list to a torch Dataset. It produces elements of the list as data.
|
112 |
+
"""
|
113 |
+
|
114 |
+
def __init__(self, lst: list, copy: bool = True, serialize: bool = True):
|
115 |
+
"""
|
116 |
+
Args:
|
117 |
+
lst (list): a list which contains elements to produce.
|
118 |
+
copy (bool): whether to deepcopy the element when producing it,
|
119 |
+
so that the result can be modified in place without affecting the
|
120 |
+
source in the list.
|
121 |
+
serialize (bool): whether to hold memory using serialized objects, when
|
122 |
+
enabled, data loader workers can use shared RAM from master
|
123 |
+
process instead of making a copy.
|
124 |
+
"""
|
125 |
+
self._lst = lst
|
126 |
+
self._copy = copy
|
127 |
+
self._serialize = serialize
|
128 |
+
|
129 |
+
def _serialize(data):
|
130 |
+
buffer = pickle.dumps(data, protocol=-1)
|
131 |
+
return np.frombuffer(buffer, dtype=np.uint8)
|
132 |
+
|
133 |
+
if self._serialize:
|
134 |
+
logger = logging.getLogger(__name__)
|
135 |
+
logger.info(
|
136 |
+
"Serializing {} elements to byte tensors and concatenating them all ...".format(
|
137 |
+
len(self._lst)
|
138 |
+
)
|
139 |
+
)
|
140 |
+
self._lst = [_serialize(x) for x in self._lst]
|
141 |
+
self._addr = np.asarray([len(x) for x in self._lst], dtype=np.int64)
|
142 |
+
self._addr = np.cumsum(self._addr)
|
143 |
+
self._lst = np.concatenate(self._lst)
|
144 |
+
logger.info("Serialized dataset takes {:.2f} MiB".format(len(self._lst) / 1024 ** 2))
|
145 |
+
|
146 |
+
def __len__(self):
|
147 |
+
if self._serialize:
|
148 |
+
return len(self._addr)
|
149 |
+
else:
|
150 |
+
return len(self._lst)
|
151 |
+
|
152 |
+
def __getitem__(self, idx):
|
153 |
+
if self._serialize:
|
154 |
+
start_addr = 0 if idx == 0 else self._addr[idx - 1].item()
|
155 |
+
end_addr = self._addr[idx].item()
|
156 |
+
bytes = memoryview(self._lst[start_addr:end_addr])
|
157 |
+
return pickle.loads(bytes)
|
158 |
+
elif self._copy:
|
159 |
+
return copy.deepcopy(self._lst[idx])
|
160 |
+
else:
|
161 |
+
return self._lst[idx]
|
162 |
+
|
163 |
+
|
164 |
+
class ToIterableDataset(data.IterableDataset):
|
165 |
+
"""
|
166 |
+
Convert an old indices-based (also called map-style) dataset
|
167 |
+
to an iterable-style dataset.
|
168 |
+
"""
|
169 |
+
|
170 |
+
def __init__(self, dataset: data.Dataset, sampler: Sampler, shard_sampler: bool = True):
|
171 |
+
"""
|
172 |
+
Args:
|
173 |
+
dataset: an old-style dataset with ``__getitem__``
|
174 |
+
sampler: a cheap iterable that produces indices to be applied on ``dataset``.
|
175 |
+
shard_sampler: whether to shard the sampler based on the current pytorch data loader
|
176 |
+
worker id. When an IterableDataset is forked by pytorch's DataLoader into multiple
|
177 |
+
workers, it is responsible for sharding its data based on worker id so that workers
|
178 |
+
don't produce identical data.
|
179 |
+
|
180 |
+
Most samplers (like our TrainingSampler) do not shard based on dataloader worker id
|
181 |
+
and this argument should be set to True. But certain samplers may be already
|
182 |
+
sharded, in that case this argument should be set to False.
|
183 |
+
"""
|
184 |
+
assert not isinstance(dataset, data.IterableDataset), dataset
|
185 |
+
assert isinstance(sampler, Sampler), sampler
|
186 |
+
self.dataset = dataset
|
187 |
+
self.sampler = sampler
|
188 |
+
self.shard_sampler = shard_sampler
|
189 |
+
|
190 |
+
def __iter__(self):
|
191 |
+
if not self.shard_sampler:
|
192 |
+
sampler = self.sampler
|
193 |
+
else:
|
194 |
+
# With map-style dataset, `DataLoader(dataset, sampler)` runs the
|
195 |
+
# sampler in main process only. But `DataLoader(ToIterableDataset(dataset, sampler))`
|
196 |
+
# will run sampler in every of the N worker. So we should only keep 1/N of the ids on
|
197 |
+
# each worker. The assumption is that sampler is cheap to iterate so it's fine to
|
198 |
+
# discard ids in workers.
|
199 |
+
sampler = _shard_iterator_dataloader_worker(self.sampler)
|
200 |
+
for idx in sampler:
|
201 |
+
yield self.dataset[idx]
|
202 |
+
|
203 |
+
def __len__(self):
|
204 |
+
return len(self.sampler)
|
205 |
+
|
206 |
+
|
207 |
+
class AspectRatioGroupedDataset(data.IterableDataset):
|
208 |
+
"""
|
209 |
+
Batch data that have similar aspect ratio together.
|
210 |
+
In this implementation, images whose aspect ratio < (or >) 1 will
|
211 |
+
be batched together.
|
212 |
+
This improves training speed because the images then need less padding
|
213 |
+
to form a batch.
|
214 |
+
|
215 |
+
It assumes the underlying dataset produces dicts with "width" and "height" keys.
|
216 |
+
It will then produce a list of original dicts with length = batch_size,
|
217 |
+
all with similar aspect ratios.
|
218 |
+
"""
|
219 |
+
|
220 |
+
def __init__(self, dataset, batch_size):
|
221 |
+
"""
|
222 |
+
Args:
|
223 |
+
dataset: an iterable. Each element must be a dict with keys
|
224 |
+
"width" and "height", which will be used to batch data.
|
225 |
+
batch_size (int):
|
226 |
+
"""
|
227 |
+
self.dataset = dataset
|
228 |
+
self.batch_size = batch_size
|
229 |
+
self._buckets = [[] for _ in range(2)]
|
230 |
+
# Hard-coded two aspect ratio groups: w > h and w < h.
|
231 |
+
# Can add support for more aspect ratio groups, but doesn't seem useful
|
232 |
+
|
233 |
+
def __iter__(self):
|
234 |
+
for d in self.dataset:
|
235 |
+
w, h = d["width"], d["height"]
|
236 |
+
bucket_id = 0 if w > h else 1
|
237 |
+
bucket = self._buckets[bucket_id]
|
238 |
+
bucket.append(d)
|
239 |
+
if len(bucket) == self.batch_size:
|
240 |
+
data = bucket[:]
|
241 |
+
# Clear bucket first, because code after yield is not
|
242 |
+
# guaranteed to execute
|
243 |
+
del bucket[:]
|
244 |
+
yield data
|
detectron2/build/lib.linux-x86_64-3.10/detectron2/data/dataset_mapper.py
ADDED
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
import copy
|
3 |
+
import logging
|
4 |
+
import numpy as np
|
5 |
+
from typing import List, Optional, Union
|
6 |
+
import torch
|
7 |
+
|
8 |
+
from detectron2.config import configurable
|
9 |
+
|
10 |
+
from . import detection_utils as utils
|
11 |
+
from . import transforms as T
|
12 |
+
|
13 |
+
"""
|
14 |
+
This file contains the default mapping that's applied to "dataset dicts".
|
15 |
+
"""
|
16 |
+
|
17 |
+
__all__ = ["DatasetMapper"]
|
18 |
+
|
19 |
+
|
20 |
+
class DatasetMapper:
|
21 |
+
"""
|
22 |
+
A callable which takes a dataset dict in Detectron2 Dataset format,
|
23 |
+
and map it into a format used by the model.
|
24 |
+
|
25 |
+
This is the default callable to be used to map your dataset dict into training data.
|
26 |
+
You may need to follow it to implement your own one for customized logic,
|
27 |
+
such as a different way to read or transform images.
|
28 |
+
See :doc:`/tutorials/data_loading` for details.
|
29 |
+
|
30 |
+
The callable currently does the following:
|
31 |
+
|
32 |
+
1. Read the image from "file_name"
|
33 |
+
2. Applies cropping/geometric transforms to the image and annotations
|
34 |
+
3. Prepare data and annotations to Tensor and :class:`Instances`
|
35 |
+
"""
|
36 |
+
|
37 |
+
@configurable
|
38 |
+
def __init__(
|
39 |
+
self,
|
40 |
+
is_train: bool,
|
41 |
+
*,
|
42 |
+
augmentations: List[Union[T.Augmentation, T.Transform]],
|
43 |
+
image_format: str,
|
44 |
+
use_instance_mask: bool = False,
|
45 |
+
use_keypoint: bool = False,
|
46 |
+
instance_mask_format: str = "polygon",
|
47 |
+
keypoint_hflip_indices: Optional[np.ndarray] = None,
|
48 |
+
precomputed_proposal_topk: Optional[int] = None,
|
49 |
+
recompute_boxes: bool = False,
|
50 |
+
):
|
51 |
+
"""
|
52 |
+
NOTE: this interface is experimental.
|
53 |
+
|
54 |
+
Args:
|
55 |
+
is_train: whether it's used in training or inference
|
56 |
+
augmentations: a list of augmentations or deterministic transforms to apply
|
57 |
+
image_format: an image format supported by :func:`detection_utils.read_image`.
|
58 |
+
use_instance_mask: whether to process instance segmentation annotations, if available
|
59 |
+
use_keypoint: whether to process keypoint annotations if available
|
60 |
+
instance_mask_format: one of "polygon" or "bitmask". Process instance segmentation
|
61 |
+
masks into this format.
|
62 |
+
keypoint_hflip_indices: see :func:`detection_utils.create_keypoint_hflip_indices`
|
63 |
+
precomputed_proposal_topk: if given, will load pre-computed
|
64 |
+
proposals from dataset_dict and keep the top k proposals for each image.
|
65 |
+
recompute_boxes: whether to overwrite bounding box annotations
|
66 |
+
by computing tight bounding boxes from instance mask annotations.
|
67 |
+
"""
|
68 |
+
if recompute_boxes:
|
69 |
+
assert use_instance_mask, "recompute_boxes requires instance masks"
|
70 |
+
# fmt: off
|
71 |
+
self.is_train = is_train
|
72 |
+
self.augmentations = T.AugmentationList(augmentations)
|
73 |
+
self.image_format = image_format
|
74 |
+
self.use_instance_mask = use_instance_mask
|
75 |
+
self.instance_mask_format = instance_mask_format
|
76 |
+
self.use_keypoint = use_keypoint
|
77 |
+
self.keypoint_hflip_indices = keypoint_hflip_indices
|
78 |
+
self.proposal_topk = precomputed_proposal_topk
|
79 |
+
self.recompute_boxes = recompute_boxes
|
80 |
+
# fmt: on
|
81 |
+
logger = logging.getLogger(__name__)
|
82 |
+
mode = "training" if is_train else "inference"
|
83 |
+
logger.info(f"[DatasetMapper] Augmentations used in {mode}: {augmentations}")
|
84 |
+
|
85 |
+
@classmethod
|
86 |
+
def from_config(cls, cfg, is_train: bool = True):
|
87 |
+
augs = utils.build_augmentation(cfg, is_train)
|
88 |
+
if cfg.INPUT.CROP.ENABLED and is_train:
|
89 |
+
augs.insert(0, T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE))
|
90 |
+
recompute_boxes = cfg.MODEL.MASK_ON
|
91 |
+
else:
|
92 |
+
recompute_boxes = False
|
93 |
+
|
94 |
+
ret = {
|
95 |
+
"is_train": is_train,
|
96 |
+
"augmentations": augs,
|
97 |
+
"image_format": cfg.INPUT.FORMAT,
|
98 |
+
"use_instance_mask": cfg.MODEL.MASK_ON,
|
99 |
+
"instance_mask_format": cfg.INPUT.MASK_FORMAT,
|
100 |
+
"use_keypoint": cfg.MODEL.KEYPOINT_ON,
|
101 |
+
"recompute_boxes": recompute_boxes,
|
102 |
+
}
|
103 |
+
|
104 |
+
if cfg.MODEL.KEYPOINT_ON:
|
105 |
+
ret["keypoint_hflip_indices"] = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
|
106 |
+
|
107 |
+
if cfg.MODEL.LOAD_PROPOSALS:
|
108 |
+
ret["precomputed_proposal_topk"] = (
|
109 |
+
cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN
|
110 |
+
if is_train
|
111 |
+
else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST
|
112 |
+
)
|
113 |
+
return ret
|
114 |
+
|
115 |
+
def _transform_annotations(self, dataset_dict, transforms, image_shape):
|
116 |
+
# USER: Modify this if you want to keep them for some reason.
|
117 |
+
for anno in dataset_dict["annotations"]:
|
118 |
+
if not self.use_instance_mask:
|
119 |
+
anno.pop("segmentation", None)
|
120 |
+
if not self.use_keypoint:
|
121 |
+
anno.pop("keypoints", None)
|
122 |
+
|
123 |
+
# USER: Implement additional transformations if you have other types of data
|
124 |
+
annos = [
|
125 |
+
utils.transform_instance_annotations(
|
126 |
+
obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
|
127 |
+
)
|
128 |
+
for obj in dataset_dict.pop("annotations")
|
129 |
+
if obj.get("iscrowd", 0) == 0
|
130 |
+
]
|
131 |
+
instances = utils.annotations_to_instances(
|
132 |
+
annos, image_shape, mask_format=self.instance_mask_format
|
133 |
+
)
|
134 |
+
|
135 |
+
# After transforms such as cropping are applied, the bounding box may no longer
|
136 |
+
# tightly bound the object. As an example, imagine a triangle object
|
137 |
+
# [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
|
138 |
+
# bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
|
139 |
+
# the intersection of original bounding box and the cropping box.
|
140 |
+
if self.recompute_boxes:
|
141 |
+
instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
|
142 |
+
dataset_dict["instances"] = utils.filter_empty_instances(instances)
|
143 |
+
|
144 |
+
def __call__(self, dataset_dict):
|
145 |
+
"""
|
146 |
+
Args:
|
147 |
+
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
|
148 |
+
|
149 |
+
Returns:
|
150 |
+
dict: a format that builtin models in detectron2 accept
|
151 |
+
"""
|
152 |
+
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
|
153 |
+
# USER: Write your own image loading if it's not from a file
|
154 |
+
image = utils.read_image(dataset_dict["file_name"], format=self.image_format)
|
155 |
+
utils.check_image_size(dataset_dict, image)
|
156 |
+
|
157 |
+
# USER: Remove if you don't do semantic/panoptic segmentation.
|
158 |
+
if "sem_seg_file_name" in dataset_dict:
|
159 |
+
sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
|
160 |
+
else:
|
161 |
+
sem_seg_gt = None
|
162 |
+
|
163 |
+
aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
|
164 |
+
transforms = self.augmentations(aug_input)
|
165 |
+
image, sem_seg_gt = aug_input.image, aug_input.sem_seg
|
166 |
+
|
167 |
+
image_shape = image.shape[:2] # h, w
|
168 |
+
# Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
|
169 |
+
# but not efficient on large generic data structures due to the use of pickle & mp.Queue.
|
170 |
+
# Therefore it's important to use torch.Tensor.
|
171 |
+
dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
|
172 |
+
if sem_seg_gt is not None:
|
173 |
+
dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long"))
|
174 |
+
|
175 |
+
# USER: Remove if you don't use pre-computed proposals.
|
176 |
+
# Most users would not need this feature.
|
177 |
+
if self.proposal_topk is not None:
|
178 |
+
utils.transform_proposals(
|
179 |
+
dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk
|
180 |
+
)
|
181 |
+
|
182 |
+
if not self.is_train:
|
183 |
+
# USER: Modify this if you want to keep them for some reason.
|
184 |
+
dataset_dict.pop("annotations", None)
|
185 |
+
dataset_dict.pop("sem_seg_file_name", None)
|
186 |
+
return dataset_dict
|
187 |
+
|
188 |
+
if "annotations" in dataset_dict:
|
189 |
+
self._transform_annotations(dataset_dict, transforms, image_shape)
|
190 |
+
|
191 |
+
return dataset_dict
|
detectron2/build/lib.linux-x86_64-3.10/detectron2/data/datasets/__init__.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
from .coco import load_coco_json, load_sem_seg, register_coco_instances, convert_to_coco_json
|
3 |
+
from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated
|
4 |
+
from .lvis import load_lvis_json, register_lvis_instances, get_lvis_instances_meta
|
5 |
+
from .pascal_voc import load_voc_instances, register_pascal_voc
|
6 |
+
from . import builtin as _builtin # ensure the builtin datasets are registered
|
7 |
+
|
8 |
+
|
9 |
+
__all__ = [k for k in globals().keys() if not k.startswith("_")]
|