Spaces:
Runtime error
Runtime error
HubHop
commited on
Commit
·
412c852
1
Parent(s):
f82f114
update
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- README copy.md +13 -0
- app.py +251 -0
- configs/_base_/datasets/ade20k.py +68 -0
- configs/_base_/datasets/ade20k_640x640.py +68 -0
- configs/_base_/datasets/bdd100k.py +70 -0
- configs/_base_/datasets/chase_db1.py +75 -0
- configs/_base_/datasets/cityscapes.py +67 -0
- configs/_base_/datasets/cityscapes_1024x1024.py +29 -0
- configs/_base_/datasets/cityscapes_768x768.py +29 -0
- configs/_base_/datasets/cityscapes_769x769.py +29 -0
- configs/_base_/datasets/cityscapes_832x832.py +29 -0
- configs/_base_/datasets/coco-stuff10k.py +69 -0
- configs/_base_/datasets/coco-stuff164k.py +67 -0
- configs/_base_/datasets/drive.py +73 -0
- configs/_base_/datasets/hrf.py +73 -0
- configs/_base_/datasets/isaid.py +73 -0
- configs/_base_/datasets/levir_256x256.py +59 -0
- configs/_base_/datasets/loveda.py +66 -0
- configs/_base_/datasets/mapillary_v1.py +68 -0
- configs/_base_/datasets/mapillary_v1_65.py +37 -0
- configs/_base_/datasets/mapillary_v2.py +68 -0
- configs/_base_/datasets/nyu.py +67 -0
- configs/_base_/datasets/nyu_512x512.py +72 -0
- configs/_base_/datasets/pascal_context.py +56 -0
- configs/_base_/datasets/pascal_context_59.py +72 -0
- configs/_base_/datasets/pascal_voc12.py +69 -0
- configs/_base_/datasets/pascal_voc12_aug.py +81 -0
- configs/_base_/datasets/potsdam.py +66 -0
- configs/_base_/datasets/refuge.py +90 -0
- configs/_base_/datasets/stare.py +73 -0
- configs/_base_/datasets/synapse.py +41 -0
- configs/_base_/datasets/vaihingen.py +66 -0
- configs/_base_/default_runtime.py +15 -0
- configs/_base_/models/ann_r50-d8.py +54 -0
- configs/_base_/models/apcnet_r50-d8.py +52 -0
- configs/_base_/models/bisenetv1_r18-d32.py +76 -0
- configs/_base_/models/bisenetv2.py +88 -0
- configs/_base_/models/ccnet_r50-d8.py +52 -0
- configs/_base_/models/cgnet.py +43 -0
- configs/_base_/models/danet_r50-d8.py +52 -0
- configs/_base_/models/deeplabv3_r50-d8.py +52 -0
- configs/_base_/models/deeplabv3_unet_s5-d16.py +58 -0
- configs/_base_/models/deeplabv3plus_r50-d8.py +54 -0
- configs/_base_/models/dmnet_r50-d8.py +52 -0
- configs/_base_/models/dnl_r50-d8.py +54 -0
- configs/_base_/models/dpt_vit-b16.py +39 -0
- configs/_base_/models/emanet_r50-d8.py +55 -0
- configs/_base_/models/encnet_r50-d8.py +56 -0
- configs/_base_/models/erfnet_fcn.py +40 -0
- configs/_base_/models/fast_scnn.py +65 -0
README copy.md
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Snnetv2 Semantic Segmentation
|
3 |
+
emoji: 🐨
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: red
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 4.14.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: apache-2.0
|
11 |
+
---
|
12 |
+
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
from argparse import ArgumentParser
|
3 |
+
|
4 |
+
import cv2
|
5 |
+
from mmengine.model.utils import revert_sync_batchnorm
|
6 |
+
|
7 |
+
from mmseg.apis import inference_model, init_model
|
8 |
+
from mmseg.apis.inference import show_result_pyplot
|
9 |
+
import torch
|
10 |
+
import time
|
11 |
+
import gradio as gr
|
12 |
+
import plotly.express as px
|
13 |
+
import json
|
14 |
+
|
15 |
+
def main():
|
16 |
+
parser = ArgumentParser()
|
17 |
+
parser.add_argument('--config', default='configs/snnet/setr_naive_512x512_160k_b16_ade20k_deit_3_s_l_224_snnetv2.py', help='Config file')
|
18 |
+
parser.add_argument('--checkpoint', help='Checkpoint file', default='setr_naive_512x512_160k_b16_ade20k_snnetv2_deit3_s_l_lora_16_iter_160000.pth')
|
19 |
+
# parser.add_argument('--video', help='Video file or webcam id')
|
20 |
+
|
21 |
+
parser.add_argument(
|
22 |
+
'--device', default='cuda:0', help='Device used for inference')
|
23 |
+
parser.add_argument(
|
24 |
+
'--palette',
|
25 |
+
default='cityscapes',
|
26 |
+
help='Color palette used for segmentation map')
|
27 |
+
parser.add_argument(
|
28 |
+
'--show', action='store_true', help='Whether to show draw result')
|
29 |
+
parser.add_argument(
|
30 |
+
'--show-wait-time', default=1, type=int, help='Wait time after imshow')
|
31 |
+
parser.add_argument(
|
32 |
+
'--output-file', default=None, type=str, help='Output video file path')
|
33 |
+
parser.add_argument(
|
34 |
+
'--output-fourcc',
|
35 |
+
default='MJPG',
|
36 |
+
type=str,
|
37 |
+
help='Fourcc of the output video')
|
38 |
+
parser.add_argument(
|
39 |
+
'--output-fps', default=30, type=int, help='FPS of the output video')
|
40 |
+
parser.add_argument(
|
41 |
+
'--output-height',
|
42 |
+
default=-1,
|
43 |
+
type=int,
|
44 |
+
help='Frame height of the output video')
|
45 |
+
parser.add_argument(
|
46 |
+
'--output-width',
|
47 |
+
default=-1,
|
48 |
+
type=int,
|
49 |
+
help='Frame width of the output video')
|
50 |
+
parser.add_argument(
|
51 |
+
'--opacity',
|
52 |
+
type=float,
|
53 |
+
default=0.5,
|
54 |
+
help='Opacity of painted segmentation map. In (0, 1] range.')
|
55 |
+
args = parser.parse_args()
|
56 |
+
|
57 |
+
# build the model from a config file and a checkpoint file
|
58 |
+
model = init_model(args.config, args.checkpoint, device=args.device)
|
59 |
+
if args.device == 'cpu':
|
60 |
+
model = revert_sync_batchnorm(model)
|
61 |
+
|
62 |
+
from mmseg.models.backbones.snnet import get_stitch_configs_bidirection
|
63 |
+
stitch_configs_info, _, _, anchor_ids, sl_ids, ls_ids, lsl_ids, sls_ids = get_stitch_configs_bidirection([12, 24])
|
64 |
+
|
65 |
+
stitch_configs_info = {i: cfg for i, cfg in enumerate(stitch_configs_info)}
|
66 |
+
|
67 |
+
|
68 |
+
with open('./model_flops/snnet_flops_setr_naive_512x512_160k_b16_ade20k_deit_3_s_l_224_snnetv2.json', 'r') as f:
|
69 |
+
flops_params = json.load(f)
|
70 |
+
|
71 |
+
with open('./results/eval_single_scale_20230507_235400.json', 'r') as f:
|
72 |
+
results = json.load(f)
|
73 |
+
|
74 |
+
config_ids = list(results.keys())
|
75 |
+
flops_res = {}
|
76 |
+
eval_res = {}
|
77 |
+
total_data = {}
|
78 |
+
for i, cfg_id in enumerate(config_ids):
|
79 |
+
flops = flops_params[cfg_id]
|
80 |
+
miou_res = results[cfg_id]['metric']['mIoU'] * 100
|
81 |
+
eval_res[int(cfg_id)] = miou_res
|
82 |
+
flops_res[int(cfg_id)] = flops / 1e9
|
83 |
+
total_data[int(cfg_id)] = [flops // 1e9, miou_res]
|
84 |
+
|
85 |
+
|
86 |
+
def visualize_stitch_pos(stitch_id):
|
87 |
+
if stitch_id == 13:
|
88 |
+
# 13 is equivalent to 0
|
89 |
+
stitch_id = 0
|
90 |
+
|
91 |
+
names = [f'ID {key}' for key in flops_res.keys()]
|
92 |
+
|
93 |
+
fig = px.scatter(x=flops_res.values(), y=eval_res.values(), hover_name=names)
|
94 |
+
fig.update_layout(
|
95 |
+
title=f"SN-Netv2 - Stitch ID - {stitch_id}",
|
96 |
+
title_x=0.5,
|
97 |
+
xaxis_title="GFLOPs",
|
98 |
+
yaxis_title="mIoU",
|
99 |
+
font=dict(
|
100 |
+
family="Courier New, monospace",
|
101 |
+
size=18,
|
102 |
+
color="RebeccaPurple"
|
103 |
+
),
|
104 |
+
legend=dict(
|
105 |
+
yanchor="bottom",
|
106 |
+
y=0.99,
|
107 |
+
xanchor="left",
|
108 |
+
x=0.01),
|
109 |
+
)
|
110 |
+
# continent, DarkSlateGrey
|
111 |
+
fig.update_traces(marker=dict(size=10,
|
112 |
+
line=dict(width=2)),
|
113 |
+
selector=dict(mode='markers'))
|
114 |
+
|
115 |
+
fig.add_scatter(x=[flops_res[stitch_id]], y=[eval_res[stitch_id]], mode='markers', marker=dict(size=15), name='Current Stitch')
|
116 |
+
return fig
|
117 |
+
|
118 |
+
|
119 |
+
def segment_video(video, stitch_id):
|
120 |
+
|
121 |
+
if stitch_id == 13:
|
122 |
+
# 13 is equivalent to 0
|
123 |
+
stitch_id = 0
|
124 |
+
|
125 |
+
model.backbone.reset_stitch_id(stitch_id)
|
126 |
+
output_video_path = './temp_video.avi'
|
127 |
+
cap = cv2.VideoCapture(video)
|
128 |
+
assert (cap.isOpened())
|
129 |
+
input_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
|
130 |
+
input_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
|
131 |
+
input_fps = cap.get(cv2.CAP_PROP_FPS)
|
132 |
+
|
133 |
+
|
134 |
+
fourcc = cv2.VideoWriter_fourcc(*args.output_fourcc)
|
135 |
+
output_fps = args.output_fps if args.output_fps > 0 else input_fps
|
136 |
+
output_height = args.output_height if args.output_height > 0 else int(
|
137 |
+
input_height)
|
138 |
+
output_width = args.output_width if args.output_width > 0 else int(
|
139 |
+
input_width)
|
140 |
+
writer = cv2.VideoWriter(output_video_path, fourcc, output_fps,
|
141 |
+
(output_width, output_height), True)
|
142 |
+
|
143 |
+
try:
|
144 |
+
while True:
|
145 |
+
start_time = time.time()
|
146 |
+
flag, frame = cap.read()
|
147 |
+
if not flag:
|
148 |
+
break
|
149 |
+
|
150 |
+
# test a single image
|
151 |
+
result = inference_model(model, frame)
|
152 |
+
|
153 |
+
# blend raw image and prediction
|
154 |
+
draw_img = show_result_pyplot(model, frame, result,
|
155 |
+
show=False,
|
156 |
+
with_labels=False,
|
157 |
+
)
|
158 |
+
|
159 |
+
if draw_img.shape[0] != output_height or draw_img.shape[
|
160 |
+
1] != output_width:
|
161 |
+
draw_img = cv2.resize(draw_img,
|
162 |
+
(output_width, output_height))
|
163 |
+
writer.write(draw_img)
|
164 |
+
finally:
|
165 |
+
if writer:
|
166 |
+
writer.release()
|
167 |
+
cap.release()
|
168 |
+
|
169 |
+
fig = visualize_stitch_pos(stitch_id)
|
170 |
+
|
171 |
+
return output_video_path, fig
|
172 |
+
|
173 |
+
def segment_image(image, stitch_id):
|
174 |
+
if stitch_id == 13:
|
175 |
+
# 13 is equivalent to 0
|
176 |
+
stitch_id = 0
|
177 |
+
|
178 |
+
model.backbone.reset_stitch_id(stitch_id)
|
179 |
+
result = inference_model(model, image)
|
180 |
+
draw_img = show_result_pyplot(model, image, result,
|
181 |
+
show=False,
|
182 |
+
with_labels=True,
|
183 |
+
)
|
184 |
+
fig = visualize_stitch_pos(stitch_id)
|
185 |
+
return draw_img, fig
|
186 |
+
|
187 |
+
|
188 |
+
|
189 |
+
with gr.Blocks() as image_demo:
|
190 |
+
with gr.Row():
|
191 |
+
with gr.Column():
|
192 |
+
image_input = gr.Image(label='Input Image')
|
193 |
+
stitch_slider = gr.Slider(minimum=0, maximum=134, step=1, label="Stitch ID")
|
194 |
+
with gr.Row():
|
195 |
+
clear_button = gr.ClearButton()
|
196 |
+
submit_button = gr.Button()
|
197 |
+
|
198 |
+
with gr.Column():
|
199 |
+
image_output = gr.Image(label='Segmentation Results')
|
200 |
+
stitch_plot = gr.Plot(label='Stitch Position')
|
201 |
+
|
202 |
+
submit_button.click(
|
203 |
+
fn=segment_image,
|
204 |
+
inputs=[image_input, stitch_slider],
|
205 |
+
outputs=[image_output, stitch_plot],
|
206 |
+
)
|
207 |
+
|
208 |
+
stitch_slider.change(
|
209 |
+
fn=visualize_stitch_pos,
|
210 |
+
inputs=[stitch_slider],
|
211 |
+
outputs=[stitch_plot],
|
212 |
+
show_progress=False
|
213 |
+
)
|
214 |
+
|
215 |
+
clear_button.click(
|
216 |
+
lambda: [None, 0, None, None],
|
217 |
+
outputs=[image_input, stitch_slider, image_output, stitch_plot],
|
218 |
+
)
|
219 |
+
|
220 |
+
gr.Examples(
|
221 |
+
[
|
222 |
+
['./demo_1.jpg', 0],
|
223 |
+
['./demo_2.jpg', 1],
|
224 |
+
['./demo_3.jpg', 93],
|
225 |
+
['./demo_4.jpg', 3],
|
226 |
+
],
|
227 |
+
inputs=[
|
228 |
+
image_input,
|
229 |
+
stitch_slider
|
230 |
+
],
|
231 |
+
outputs=[
|
232 |
+
image_input,
|
233 |
+
stitch_plot
|
234 |
+
],
|
235 |
+
)
|
236 |
+
|
237 |
+
with gr.Blocks() as demo:
|
238 |
+
with gr.Column():
|
239 |
+
gr.HTML("""
|
240 |
+
<h1 align="center" style=" display: flex; flex-direction: row; justify-content: center; font-size: 25pt; ">Stitched ViTs are Flexible Vision Backbones</h1>
|
241 |
+
<div align="center"> <img align="center" src='file/gradio_banner.png' width="70%"> </div>
|
242 |
+
<h3 align="center" >This is the classification demo page of SN-Netv2, an flexible vision backbone that allows for 100+ runtime speed and performance trade-offs. You can also run this gradio demo on your local GPUs at <a href="https://github.com/ziplab/SN-Netv2">https://github.com/ziplab/SN-Netv2</a>, Paper link: <a href="https://arxiv.org/abs/2307.00154">https://arxiv.org/abs/2307.00154</a>.</h3>
|
243 |
+
""")
|
244 |
+
tabbed_page = gr.TabbedInterface([image_demo,], ['Image'])
|
245 |
+
|
246 |
+
|
247 |
+
demo.launch(allowed_paths=['./'])
|
248 |
+
|
249 |
+
|
250 |
+
if __name__ == '__main__':
|
251 |
+
main()
|
configs/_base_/datasets/ade20k.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'ADE20KDataset'
|
3 |
+
data_root = 'data/ade/ADEChallengeData2016'
|
4 |
+
crop_size = (512, 512)
|
5 |
+
train_pipeline = [
|
6 |
+
dict(type='LoadImageFromFile'),
|
7 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
8 |
+
dict(
|
9 |
+
type='RandomResize',
|
10 |
+
scale=(2048, 512),
|
11 |
+
ratio_range=(0.5, 2.0),
|
12 |
+
keep_ratio=True),
|
13 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
14 |
+
dict(type='RandomFlip', prob=0.5),
|
15 |
+
dict(type='PhotoMetricDistortion'),
|
16 |
+
dict(type='PackSegInputs')
|
17 |
+
]
|
18 |
+
test_pipeline = [
|
19 |
+
dict(type='LoadImageFromFile'),
|
20 |
+
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
|
21 |
+
# add loading annotation after ``Resize`` because ground truth
|
22 |
+
# does not need to do resize data transform
|
23 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
24 |
+
dict(type='PackSegInputs')
|
25 |
+
]
|
26 |
+
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
27 |
+
tta_pipeline = [
|
28 |
+
dict(type='LoadImageFromFile', backend_args=None),
|
29 |
+
dict(
|
30 |
+
type='TestTimeAug',
|
31 |
+
transforms=[
|
32 |
+
[
|
33 |
+
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
34 |
+
for r in img_ratios
|
35 |
+
],
|
36 |
+
[
|
37 |
+
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
38 |
+
dict(type='RandomFlip', prob=1., direction='horizontal')
|
39 |
+
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
40 |
+
])
|
41 |
+
]
|
42 |
+
train_dataloader = dict(
|
43 |
+
batch_size=4,
|
44 |
+
num_workers=4,
|
45 |
+
persistent_workers=True,
|
46 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
47 |
+
dataset=dict(
|
48 |
+
type=dataset_type,
|
49 |
+
data_root=data_root,
|
50 |
+
data_prefix=dict(
|
51 |
+
img_path='images/training', seg_map_path='annotations/training'),
|
52 |
+
pipeline=train_pipeline))
|
53 |
+
val_dataloader = dict(
|
54 |
+
batch_size=1,
|
55 |
+
num_workers=4,
|
56 |
+
persistent_workers=True,
|
57 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
58 |
+
dataset=dict(
|
59 |
+
type=dataset_type,
|
60 |
+
data_root=data_root,
|
61 |
+
data_prefix=dict(
|
62 |
+
img_path='images/validation',
|
63 |
+
seg_map_path='annotations/validation'),
|
64 |
+
pipeline=test_pipeline))
|
65 |
+
test_dataloader = val_dataloader
|
66 |
+
|
67 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
68 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/ade20k_640x640.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'ADE20KDataset'
|
3 |
+
data_root = 'data/ade/ADEChallengeData2016'
|
4 |
+
crop_size = (640, 640)
|
5 |
+
train_pipeline = [
|
6 |
+
dict(type='LoadImageFromFile'),
|
7 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
8 |
+
dict(
|
9 |
+
type='RandomResize',
|
10 |
+
scale=(2560, 640),
|
11 |
+
ratio_range=(0.5, 2.0),
|
12 |
+
keep_ratio=True),
|
13 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
14 |
+
dict(type='RandomFlip', prob=0.5),
|
15 |
+
dict(type='PhotoMetricDistortion'),
|
16 |
+
dict(type='PackSegInputs')
|
17 |
+
]
|
18 |
+
test_pipeline = [
|
19 |
+
dict(type='LoadImageFromFile'),
|
20 |
+
dict(type='Resize', scale=(2560, 640), keep_ratio=True),
|
21 |
+
# add loading annotation after ``Resize`` because ground truth
|
22 |
+
# does not need to do resize data transform
|
23 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
24 |
+
dict(type='PackSegInputs')
|
25 |
+
]
|
26 |
+
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
27 |
+
tta_pipeline = [
|
28 |
+
dict(type='LoadImageFromFile', backend_args=None),
|
29 |
+
dict(
|
30 |
+
type='TestTimeAug',
|
31 |
+
transforms=[
|
32 |
+
[
|
33 |
+
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
34 |
+
for r in img_ratios
|
35 |
+
],
|
36 |
+
[
|
37 |
+
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
38 |
+
dict(type='RandomFlip', prob=1., direction='horizontal')
|
39 |
+
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
40 |
+
])
|
41 |
+
]
|
42 |
+
train_dataloader = dict(
|
43 |
+
batch_size=4,
|
44 |
+
num_workers=4,
|
45 |
+
persistent_workers=True,
|
46 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
47 |
+
dataset=dict(
|
48 |
+
type=dataset_type,
|
49 |
+
data_root=data_root,
|
50 |
+
data_prefix=dict(
|
51 |
+
img_path='images/training', seg_map_path='annotations/training'),
|
52 |
+
pipeline=train_pipeline))
|
53 |
+
val_dataloader = dict(
|
54 |
+
batch_size=1,
|
55 |
+
num_workers=4,
|
56 |
+
persistent_workers=True,
|
57 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
58 |
+
dataset=dict(
|
59 |
+
type=dataset_type,
|
60 |
+
data_root=data_root,
|
61 |
+
data_prefix=dict(
|
62 |
+
img_path='images/validation',
|
63 |
+
seg_map_path='annotations/validation'),
|
64 |
+
pipeline=test_pipeline))
|
65 |
+
test_dataloader = val_dataloader
|
66 |
+
|
67 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
68 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/bdd100k.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'BDD100KDataset'
|
3 |
+
data_root = 'data/bdd100k/'
|
4 |
+
|
5 |
+
crop_size = (512, 1024)
|
6 |
+
train_pipeline = [
|
7 |
+
dict(type='LoadImageFromFile'),
|
8 |
+
dict(type='LoadAnnotations'),
|
9 |
+
dict(
|
10 |
+
type='RandomResize',
|
11 |
+
scale=(2048, 1024),
|
12 |
+
ratio_range=(0.5, 2.0),
|
13 |
+
keep_ratio=True),
|
14 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
15 |
+
dict(type='RandomFlip', prob=0.5),
|
16 |
+
dict(type='PhotoMetricDistortion'),
|
17 |
+
dict(type='PackSegInputs')
|
18 |
+
]
|
19 |
+
test_pipeline = [
|
20 |
+
dict(type='LoadImageFromFile'),
|
21 |
+
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
|
22 |
+
# add loading annotation after ``Resize`` because ground truth
|
23 |
+
# does not need to do resize data transform
|
24 |
+
dict(type='LoadAnnotations'),
|
25 |
+
dict(type='PackSegInputs')
|
26 |
+
]
|
27 |
+
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
28 |
+
tta_pipeline = [
|
29 |
+
dict(type='LoadImageFromFile', backend_args=None),
|
30 |
+
dict(
|
31 |
+
type='TestTimeAug',
|
32 |
+
transforms=[
|
33 |
+
[
|
34 |
+
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
35 |
+
for r in img_ratios
|
36 |
+
],
|
37 |
+
[
|
38 |
+
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
39 |
+
dict(type='RandomFlip', prob=1., direction='horizontal')
|
40 |
+
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
41 |
+
])
|
42 |
+
]
|
43 |
+
train_dataloader = dict(
|
44 |
+
batch_size=2,
|
45 |
+
num_workers=2,
|
46 |
+
persistent_workers=True,
|
47 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
48 |
+
dataset=dict(
|
49 |
+
type=dataset_type,
|
50 |
+
data_root=data_root,
|
51 |
+
data_prefix=dict(
|
52 |
+
img_path='images/10k/train',
|
53 |
+
seg_map_path='labels/sem_seg/masks/train'),
|
54 |
+
pipeline=train_pipeline))
|
55 |
+
val_dataloader = dict(
|
56 |
+
batch_size=1,
|
57 |
+
num_workers=4,
|
58 |
+
persistent_workers=True,
|
59 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
60 |
+
dataset=dict(
|
61 |
+
type=dataset_type,
|
62 |
+
data_root=data_root,
|
63 |
+
data_prefix=dict(
|
64 |
+
img_path='images/10k/val',
|
65 |
+
seg_map_path='labels/sem_seg/masks/val'),
|
66 |
+
pipeline=test_pipeline))
|
67 |
+
test_dataloader = val_dataloader
|
68 |
+
|
69 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
70 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/chase_db1.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'ChaseDB1Dataset'
|
3 |
+
data_root = 'data/CHASE_DB1'
|
4 |
+
img_scale = (960, 999)
|
5 |
+
crop_size = (128, 128)
|
6 |
+
train_pipeline = [
|
7 |
+
dict(type='LoadImageFromFile'),
|
8 |
+
dict(type='LoadAnnotations'),
|
9 |
+
dict(
|
10 |
+
type='RandomResize',
|
11 |
+
scale=img_scale,
|
12 |
+
ratio_range=(0.5, 2.0),
|
13 |
+
keep_ratio=True),
|
14 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
15 |
+
dict(type='RandomFlip', prob=0.5),
|
16 |
+
dict(type='PhotoMetricDistortion'),
|
17 |
+
dict(type='PackSegInputs')
|
18 |
+
]
|
19 |
+
test_pipeline = [
|
20 |
+
dict(type='LoadImageFromFile'),
|
21 |
+
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
22 |
+
# add loading annotation after ``Resize`` because ground truth
|
23 |
+
# does not need to do resize data transform
|
24 |
+
dict(type='LoadAnnotations'),
|
25 |
+
dict(type='PackSegInputs')
|
26 |
+
]
|
27 |
+
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
28 |
+
tta_pipeline = [
|
29 |
+
dict(type='LoadImageFromFile', backend_args=None),
|
30 |
+
dict(
|
31 |
+
type='TestTimeAug',
|
32 |
+
transforms=[
|
33 |
+
[
|
34 |
+
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
35 |
+
for r in img_ratios
|
36 |
+
],
|
37 |
+
[
|
38 |
+
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
39 |
+
dict(type='RandomFlip', prob=1., direction='horizontal')
|
40 |
+
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
41 |
+
])
|
42 |
+
]
|
43 |
+
|
44 |
+
train_dataloader = dict(
|
45 |
+
batch_size=4,
|
46 |
+
num_workers=4,
|
47 |
+
persistent_workers=True,
|
48 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
49 |
+
dataset=dict(
|
50 |
+
type='RepeatDataset',
|
51 |
+
times=40000,
|
52 |
+
dataset=dict(
|
53 |
+
type=dataset_type,
|
54 |
+
data_root=data_root,
|
55 |
+
data_prefix=dict(
|
56 |
+
img_path='images/training',
|
57 |
+
seg_map_path='annotations/training'),
|
58 |
+
pipeline=train_pipeline)))
|
59 |
+
|
60 |
+
val_dataloader = dict(
|
61 |
+
batch_size=1,
|
62 |
+
num_workers=4,
|
63 |
+
persistent_workers=True,
|
64 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
65 |
+
dataset=dict(
|
66 |
+
type=dataset_type,
|
67 |
+
data_root=data_root,
|
68 |
+
data_prefix=dict(
|
69 |
+
img_path='images/validation',
|
70 |
+
seg_map_path='annotations/validation'),
|
71 |
+
pipeline=test_pipeline))
|
72 |
+
test_dataloader = val_dataloader
|
73 |
+
|
74 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
|
75 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/cityscapes.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'CityscapesDataset'
|
3 |
+
data_root = 'data/cityscapes/'
|
4 |
+
crop_size = (512, 1024)
|
5 |
+
train_pipeline = [
|
6 |
+
dict(type='LoadImageFromFile'),
|
7 |
+
dict(type='LoadAnnotations'),
|
8 |
+
dict(
|
9 |
+
type='RandomResize',
|
10 |
+
scale=(2048, 1024),
|
11 |
+
ratio_range=(0.5, 2.0),
|
12 |
+
keep_ratio=True),
|
13 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
14 |
+
dict(type='RandomFlip', prob=0.5),
|
15 |
+
dict(type='PhotoMetricDistortion'),
|
16 |
+
dict(type='PackSegInputs')
|
17 |
+
]
|
18 |
+
test_pipeline = [
|
19 |
+
dict(type='LoadImageFromFile'),
|
20 |
+
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
|
21 |
+
# add loading annotation after ``Resize`` because ground truth
|
22 |
+
# does not need to do resize data transform
|
23 |
+
dict(type='LoadAnnotations'),
|
24 |
+
dict(type='PackSegInputs')
|
25 |
+
]
|
26 |
+
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
27 |
+
tta_pipeline = [
|
28 |
+
dict(type='LoadImageFromFile', backend_args=None),
|
29 |
+
dict(
|
30 |
+
type='TestTimeAug',
|
31 |
+
transforms=[
|
32 |
+
[
|
33 |
+
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
34 |
+
for r in img_ratios
|
35 |
+
],
|
36 |
+
[
|
37 |
+
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
38 |
+
dict(type='RandomFlip', prob=1., direction='horizontal')
|
39 |
+
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
40 |
+
])
|
41 |
+
]
|
42 |
+
train_dataloader = dict(
|
43 |
+
batch_size=2,
|
44 |
+
num_workers=2,
|
45 |
+
persistent_workers=True,
|
46 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
47 |
+
dataset=dict(
|
48 |
+
type=dataset_type,
|
49 |
+
data_root=data_root,
|
50 |
+
data_prefix=dict(
|
51 |
+
img_path='leftImg8bit/train', seg_map_path='gtFine/train'),
|
52 |
+
pipeline=train_pipeline))
|
53 |
+
val_dataloader = dict(
|
54 |
+
batch_size=1,
|
55 |
+
num_workers=4,
|
56 |
+
persistent_workers=True,
|
57 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
58 |
+
dataset=dict(
|
59 |
+
type=dataset_type,
|
60 |
+
data_root=data_root,
|
61 |
+
data_prefix=dict(
|
62 |
+
img_path='leftImg8bit/val', seg_map_path='gtFine/val'),
|
63 |
+
pipeline=test_pipeline))
|
64 |
+
test_dataloader = val_dataloader
|
65 |
+
|
66 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
67 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/cityscapes_1024x1024.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = './cityscapes.py'
|
2 |
+
crop_size = (1024, 1024)
|
3 |
+
train_pipeline = [
|
4 |
+
dict(type='LoadImageFromFile'),
|
5 |
+
dict(type='LoadAnnotations'),
|
6 |
+
dict(
|
7 |
+
type='RandomResize',
|
8 |
+
scale=(2048, 1024),
|
9 |
+
ratio_range=(0.5, 2.0),
|
10 |
+
keep_ratio=True),
|
11 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
12 |
+
dict(type='RandomFlip', prob=0.5),
|
13 |
+
dict(type='PhotoMetricDistortion'),
|
14 |
+
dict(type='PackSegInputs')
|
15 |
+
]
|
16 |
+
test_pipeline = [
|
17 |
+
dict(type='LoadImageFromFile'),
|
18 |
+
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
|
19 |
+
# add loading annotation after ``Resize`` because ground truth
|
20 |
+
# does not need to do resize data transform
|
21 |
+
dict(type='LoadAnnotations'),
|
22 |
+
dict(type='PackSegInputs')
|
23 |
+
]
|
24 |
+
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
|
25 |
+
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
|
26 |
+
test_dataloader = val_dataloader
|
27 |
+
|
28 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
29 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/cityscapes_768x768.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = './cityscapes.py'
|
2 |
+
crop_size = (768, 768)
|
3 |
+
train_pipeline = [
|
4 |
+
dict(type='LoadImageFromFile'),
|
5 |
+
dict(type='LoadAnnotations'),
|
6 |
+
dict(
|
7 |
+
type='RandomResize',
|
8 |
+
scale=(2049, 1025),
|
9 |
+
ratio_range=(0.5, 2.0),
|
10 |
+
keep_ratio=True),
|
11 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
12 |
+
dict(type='RandomFlip', prob=0.5),
|
13 |
+
dict(type='PhotoMetricDistortion'),
|
14 |
+
dict(type='PackSegInputs')
|
15 |
+
]
|
16 |
+
test_pipeline = [
|
17 |
+
dict(type='LoadImageFromFile'),
|
18 |
+
dict(type='Resize', scale=(2049, 1025), keep_ratio=True),
|
19 |
+
# add loading annotation after ``Resize`` because ground truth
|
20 |
+
# does not need to do resize data transform
|
21 |
+
dict(type='LoadAnnotations'),
|
22 |
+
dict(type='PackSegInputs')
|
23 |
+
]
|
24 |
+
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
|
25 |
+
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
|
26 |
+
test_dataloader = val_dataloader
|
27 |
+
|
28 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
29 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/cityscapes_769x769.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = './cityscapes.py'
|
2 |
+
crop_size = (769, 769)
|
3 |
+
train_pipeline = [
|
4 |
+
dict(type='LoadImageFromFile'),
|
5 |
+
dict(type='LoadAnnotations'),
|
6 |
+
dict(
|
7 |
+
type='RandomResize',
|
8 |
+
scale=(2049, 1025),
|
9 |
+
ratio_range=(0.5, 2.0),
|
10 |
+
keep_ratio=True),
|
11 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
12 |
+
dict(type='RandomFlip', prob=0.5),
|
13 |
+
dict(type='PhotoMetricDistortion'),
|
14 |
+
dict(type='PackSegInputs')
|
15 |
+
]
|
16 |
+
test_pipeline = [
|
17 |
+
dict(type='LoadImageFromFile'),
|
18 |
+
dict(type='Resize', scale=(2049, 1025), keep_ratio=True),
|
19 |
+
# add loading annotation after ``Resize`` because ground truth
|
20 |
+
# does not need to do resize data transform
|
21 |
+
dict(type='LoadAnnotations'),
|
22 |
+
dict(type='PackSegInputs')
|
23 |
+
]
|
24 |
+
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
|
25 |
+
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
|
26 |
+
test_dataloader = val_dataloader
|
27 |
+
|
28 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
29 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/cityscapes_832x832.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = './cityscapes.py'
|
2 |
+
crop_size = (832, 832)
|
3 |
+
train_pipeline = [
|
4 |
+
dict(type='LoadImageFromFile'),
|
5 |
+
dict(type='LoadAnnotations'),
|
6 |
+
dict(
|
7 |
+
type='RandomResize',
|
8 |
+
scale=(2048, 1024),
|
9 |
+
ratio_range=(0.5, 2.0),
|
10 |
+
keep_ratio=True),
|
11 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
12 |
+
dict(type='RandomFlip', prob=0.5),
|
13 |
+
dict(type='PhotoMetricDistortion'),
|
14 |
+
dict(type='PackSegInputs')
|
15 |
+
]
|
16 |
+
test_pipeline = [
|
17 |
+
dict(type='LoadImageFromFile'),
|
18 |
+
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
|
19 |
+
# add loading annotation after ``Resize`` because ground truth
|
20 |
+
# does not need to do resize data transform
|
21 |
+
dict(type='LoadAnnotations'),
|
22 |
+
dict(type='PackSegInputs')
|
23 |
+
]
|
24 |
+
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
|
25 |
+
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
|
26 |
+
test_dataloader = val_dataloader
|
27 |
+
|
28 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
29 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/coco-stuff10k.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'COCOStuffDataset'
|
3 |
+
data_root = 'data/coco_stuff10k'
|
4 |
+
crop_size = (512, 512)
|
5 |
+
train_pipeline = [
|
6 |
+
dict(type='LoadImageFromFile'),
|
7 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
8 |
+
dict(
|
9 |
+
type='RandomResize',
|
10 |
+
scale=(2048, 512),
|
11 |
+
ratio_range=(0.5, 2.0),
|
12 |
+
keep_ratio=True),
|
13 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
14 |
+
dict(type='RandomFlip', prob=0.5),
|
15 |
+
dict(type='PhotoMetricDistortion'),
|
16 |
+
dict(type='PackSegInputs')
|
17 |
+
]
|
18 |
+
test_pipeline = [
|
19 |
+
dict(type='LoadImageFromFile'),
|
20 |
+
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
|
21 |
+
# add loading annotation after ``Resize`` because ground truth
|
22 |
+
# does not need to do resize data transform
|
23 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
24 |
+
dict(type='PackSegInputs')
|
25 |
+
]
|
26 |
+
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
27 |
+
tta_pipeline = [
|
28 |
+
dict(type='LoadImageFromFile', backend_args=None),
|
29 |
+
dict(
|
30 |
+
type='TestTimeAug',
|
31 |
+
transforms=[
|
32 |
+
[
|
33 |
+
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
34 |
+
for r in img_ratios
|
35 |
+
],
|
36 |
+
[
|
37 |
+
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
38 |
+
dict(type='RandomFlip', prob=1., direction='horizontal')
|
39 |
+
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
40 |
+
])
|
41 |
+
]
|
42 |
+
train_dataloader = dict(
|
43 |
+
batch_size=4,
|
44 |
+
num_workers=4,
|
45 |
+
persistent_workers=True,
|
46 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
47 |
+
dataset=dict(
|
48 |
+
type=dataset_type,
|
49 |
+
data_root=data_root,
|
50 |
+
reduce_zero_label=True,
|
51 |
+
data_prefix=dict(
|
52 |
+
img_path='images/train2014', seg_map_path='annotations/train2014'),
|
53 |
+
pipeline=train_pipeline))
|
54 |
+
val_dataloader = dict(
|
55 |
+
batch_size=1,
|
56 |
+
num_workers=4,
|
57 |
+
persistent_workers=True,
|
58 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
59 |
+
dataset=dict(
|
60 |
+
type=dataset_type,
|
61 |
+
data_root=data_root,
|
62 |
+
reduce_zero_label=True,
|
63 |
+
data_prefix=dict(
|
64 |
+
img_path='images/test2014', seg_map_path='annotations/test2014'),
|
65 |
+
pipeline=test_pipeline))
|
66 |
+
test_dataloader = val_dataloader
|
67 |
+
|
68 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
69 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/coco-stuff164k.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'COCOStuffDataset'
|
3 |
+
data_root = 'data/coco_stuff164k'
|
4 |
+
crop_size = (512, 512)
|
5 |
+
train_pipeline = [
|
6 |
+
dict(type='LoadImageFromFile'),
|
7 |
+
dict(type='LoadAnnotations'),
|
8 |
+
dict(
|
9 |
+
type='RandomResize',
|
10 |
+
scale=(2048, 512),
|
11 |
+
ratio_range=(0.5, 2.0),
|
12 |
+
keep_ratio=True),
|
13 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
14 |
+
dict(type='RandomFlip', prob=0.5),
|
15 |
+
dict(type='PhotoMetricDistortion'),
|
16 |
+
dict(type='PackSegInputs')
|
17 |
+
]
|
18 |
+
test_pipeline = [
|
19 |
+
dict(type='LoadImageFromFile'),
|
20 |
+
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
|
21 |
+
# add loading annotation after ``Resize`` because ground truth
|
22 |
+
# does not need to do resize data transform
|
23 |
+
dict(type='LoadAnnotations'),
|
24 |
+
dict(type='PackSegInputs')
|
25 |
+
]
|
26 |
+
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
27 |
+
tta_pipeline = [
|
28 |
+
dict(type='LoadImageFromFile', backend_args=None),
|
29 |
+
dict(
|
30 |
+
type='TestTimeAug',
|
31 |
+
transforms=[
|
32 |
+
[
|
33 |
+
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
34 |
+
for r in img_ratios
|
35 |
+
],
|
36 |
+
[
|
37 |
+
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
38 |
+
dict(type='RandomFlip', prob=1., direction='horizontal')
|
39 |
+
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
40 |
+
])
|
41 |
+
]
|
42 |
+
train_dataloader = dict(
|
43 |
+
batch_size=4,
|
44 |
+
num_workers=4,
|
45 |
+
persistent_workers=True,
|
46 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
47 |
+
dataset=dict(
|
48 |
+
type=dataset_type,
|
49 |
+
data_root=data_root,
|
50 |
+
data_prefix=dict(
|
51 |
+
img_path='images/train2017', seg_map_path='annotations/train2017'),
|
52 |
+
pipeline=train_pipeline))
|
53 |
+
val_dataloader = dict(
|
54 |
+
batch_size=1,
|
55 |
+
num_workers=4,
|
56 |
+
persistent_workers=True,
|
57 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
58 |
+
dataset=dict(
|
59 |
+
type=dataset_type,
|
60 |
+
data_root=data_root,
|
61 |
+
data_prefix=dict(
|
62 |
+
img_path='images/val2017', seg_map_path='annotations/val2017'),
|
63 |
+
pipeline=test_pipeline))
|
64 |
+
test_dataloader = val_dataloader
|
65 |
+
|
66 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
67 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/drive.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'DRIVEDataset'
|
3 |
+
data_root = 'data/DRIVE'
|
4 |
+
img_scale = (584, 565)
|
5 |
+
crop_size = (64, 64)
|
6 |
+
train_pipeline = [
|
7 |
+
dict(type='LoadImageFromFile'),
|
8 |
+
dict(type='LoadAnnotations'),
|
9 |
+
dict(
|
10 |
+
type='RandomResize',
|
11 |
+
scale=img_scale,
|
12 |
+
ratio_range=(0.5, 2.0),
|
13 |
+
keep_ratio=True),
|
14 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
15 |
+
dict(type='RandomFlip', prob=0.5),
|
16 |
+
dict(type='PhotoMetricDistortion'),
|
17 |
+
dict(type='PackSegInputs')
|
18 |
+
]
|
19 |
+
test_pipeline = [
|
20 |
+
dict(type='LoadImageFromFile'),
|
21 |
+
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
22 |
+
# add loading annotation after ``Resize`` because ground truth
|
23 |
+
# does not need to do resize data transform
|
24 |
+
dict(type='LoadAnnotations'),
|
25 |
+
dict(type='PackSegInputs')
|
26 |
+
]
|
27 |
+
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
28 |
+
tta_pipeline = [
|
29 |
+
dict(type='LoadImageFromFile', backend_args=None),
|
30 |
+
dict(
|
31 |
+
type='TestTimeAug',
|
32 |
+
transforms=[
|
33 |
+
[
|
34 |
+
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
35 |
+
for r in img_ratios
|
36 |
+
],
|
37 |
+
[
|
38 |
+
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
39 |
+
dict(type='RandomFlip', prob=1., direction='horizontal')
|
40 |
+
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
41 |
+
])
|
42 |
+
]
|
43 |
+
train_dataloader = dict(
|
44 |
+
batch_size=4,
|
45 |
+
num_workers=4,
|
46 |
+
persistent_workers=True,
|
47 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
48 |
+
dataset=dict(
|
49 |
+
type='RepeatDataset',
|
50 |
+
times=40000,
|
51 |
+
dataset=dict(
|
52 |
+
type=dataset_type,
|
53 |
+
data_root=data_root,
|
54 |
+
data_prefix=dict(
|
55 |
+
img_path='images/training',
|
56 |
+
seg_map_path='annotations/training'),
|
57 |
+
pipeline=train_pipeline)))
|
58 |
+
val_dataloader = dict(
|
59 |
+
batch_size=1,
|
60 |
+
num_workers=4,
|
61 |
+
persistent_workers=True,
|
62 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
63 |
+
dataset=dict(
|
64 |
+
type=dataset_type,
|
65 |
+
data_root=data_root,
|
66 |
+
data_prefix=dict(
|
67 |
+
img_path='images/validation',
|
68 |
+
seg_map_path='annotations/validation'),
|
69 |
+
pipeline=test_pipeline))
|
70 |
+
test_dataloader = val_dataloader
|
71 |
+
|
72 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
|
73 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/hrf.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'HRFDataset'
|
3 |
+
data_root = 'data/HRF'
|
4 |
+
img_scale = (2336, 3504)
|
5 |
+
crop_size = (256, 256)
|
6 |
+
train_pipeline = [
|
7 |
+
dict(type='LoadImageFromFile'),
|
8 |
+
dict(type='LoadAnnotations'),
|
9 |
+
dict(
|
10 |
+
type='RandomResize',
|
11 |
+
scale=img_scale,
|
12 |
+
ratio_range=(0.5, 2.0),
|
13 |
+
keep_ratio=True),
|
14 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
15 |
+
dict(type='RandomFlip', prob=0.5),
|
16 |
+
dict(type='PhotoMetricDistortion'),
|
17 |
+
dict(type='PackSegInputs')
|
18 |
+
]
|
19 |
+
test_pipeline = [
|
20 |
+
dict(type='LoadImageFromFile'),
|
21 |
+
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
22 |
+
# add loading annotation after ``Resize`` because ground truth
|
23 |
+
# does not need to do resize data transform
|
24 |
+
dict(type='LoadAnnotations'),
|
25 |
+
dict(type='PackSegInputs')
|
26 |
+
]
|
27 |
+
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
28 |
+
tta_pipeline = [
|
29 |
+
dict(type='LoadImageFromFile', backend_args=None),
|
30 |
+
dict(
|
31 |
+
type='TestTimeAug',
|
32 |
+
transforms=[
|
33 |
+
[
|
34 |
+
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
35 |
+
for r in img_ratios
|
36 |
+
],
|
37 |
+
[
|
38 |
+
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
39 |
+
dict(type='RandomFlip', prob=1., direction='horizontal')
|
40 |
+
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
41 |
+
])
|
42 |
+
]
|
43 |
+
train_dataloader = dict(
|
44 |
+
batch_size=4,
|
45 |
+
num_workers=4,
|
46 |
+
persistent_workers=True,
|
47 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
48 |
+
dataset=dict(
|
49 |
+
type='RepeatDataset',
|
50 |
+
times=40000,
|
51 |
+
dataset=dict(
|
52 |
+
type=dataset_type,
|
53 |
+
data_root=data_root,
|
54 |
+
data_prefix=dict(
|
55 |
+
img_path='images/training',
|
56 |
+
seg_map_path='annotations/training'),
|
57 |
+
pipeline=train_pipeline)))
|
58 |
+
val_dataloader = dict(
|
59 |
+
batch_size=1,
|
60 |
+
num_workers=4,
|
61 |
+
persistent_workers=True,
|
62 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
63 |
+
dataset=dict(
|
64 |
+
type=dataset_type,
|
65 |
+
data_root=data_root,
|
66 |
+
data_prefix=dict(
|
67 |
+
img_path='images/validation',
|
68 |
+
seg_map_path='annotations/validation'),
|
69 |
+
pipeline=test_pipeline))
|
70 |
+
test_dataloader = val_dataloader
|
71 |
+
|
72 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
|
73 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/isaid.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'iSAIDDataset'
|
3 |
+
data_root = 'data/iSAID'
|
4 |
+
"""
|
5 |
+
This crop_size setting is followed by the implementation of
|
6 |
+
`PointFlow: Flowing Semantics Through Points for Aerial Image
|
7 |
+
Segmentation <https://arxiv.org/pdf/2103.06564.pdf>`_.
|
8 |
+
"""
|
9 |
+
|
10 |
+
crop_size = (896, 896)
|
11 |
+
|
12 |
+
train_pipeline = [
|
13 |
+
dict(type='LoadImageFromFile'),
|
14 |
+
dict(type='LoadAnnotations'),
|
15 |
+
dict(
|
16 |
+
type='RandomResize',
|
17 |
+
scale=(896, 896),
|
18 |
+
ratio_range=(0.5, 2.0),
|
19 |
+
keep_ratio=True),
|
20 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
21 |
+
dict(type='RandomFlip', prob=0.5),
|
22 |
+
dict(type='PhotoMetricDistortion'),
|
23 |
+
dict(type='PackSegInputs')
|
24 |
+
]
|
25 |
+
test_pipeline = [
|
26 |
+
dict(type='LoadImageFromFile'),
|
27 |
+
dict(type='Resize', scale=(896, 896), keep_ratio=True),
|
28 |
+
# add loading annotation after ``Resize`` because ground truth
|
29 |
+
# does not need to do resize data transform
|
30 |
+
dict(type='LoadAnnotations'),
|
31 |
+
dict(type='PackSegInputs')
|
32 |
+
]
|
33 |
+
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
34 |
+
tta_pipeline = [
|
35 |
+
dict(type='LoadImageFromFile', backend_args=None),
|
36 |
+
dict(
|
37 |
+
type='TestTimeAug',
|
38 |
+
transforms=[
|
39 |
+
[
|
40 |
+
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
41 |
+
for r in img_ratios
|
42 |
+
],
|
43 |
+
[
|
44 |
+
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
45 |
+
dict(type='RandomFlip', prob=1., direction='horizontal')
|
46 |
+
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
47 |
+
])
|
48 |
+
]
|
49 |
+
train_dataloader = dict(
|
50 |
+
batch_size=4,
|
51 |
+
num_workers=4,
|
52 |
+
persistent_workers=True,
|
53 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
54 |
+
dataset=dict(
|
55 |
+
type=dataset_type,
|
56 |
+
data_root=data_root,
|
57 |
+
data_prefix=dict(
|
58 |
+
img_path='img_dir/train', seg_map_path='ann_dir/train'),
|
59 |
+
pipeline=train_pipeline))
|
60 |
+
val_dataloader = dict(
|
61 |
+
batch_size=1,
|
62 |
+
num_workers=4,
|
63 |
+
persistent_workers=True,
|
64 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
65 |
+
dataset=dict(
|
66 |
+
type=dataset_type,
|
67 |
+
data_root=data_root,
|
68 |
+
data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
|
69 |
+
pipeline=test_pipeline))
|
70 |
+
test_dataloader = val_dataloader
|
71 |
+
|
72 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
73 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/levir_256x256.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'LEVIRCDDataset'
|
3 |
+
data_root = r'data/LEVIRCD'
|
4 |
+
|
5 |
+
albu_train_transforms = [
|
6 |
+
dict(type='RandomBrightnessContrast', p=0.2),
|
7 |
+
dict(type='HorizontalFlip', p=0.5),
|
8 |
+
dict(type='VerticalFlip', p=0.5)
|
9 |
+
]
|
10 |
+
|
11 |
+
train_pipeline = [
|
12 |
+
dict(type='LoadMultipleRSImageFromFile'),
|
13 |
+
dict(type='LoadAnnotations'),
|
14 |
+
dict(type='Albu', transforms=albu_train_transforms),
|
15 |
+
dict(type='ConcatCDInput'),
|
16 |
+
dict(type='PackSegInputs')
|
17 |
+
]
|
18 |
+
test_pipeline = [
|
19 |
+
dict(type='LoadMultipleRSImageFromFile'),
|
20 |
+
dict(type='LoadAnnotations'),
|
21 |
+
dict(type='ConcatCDInput'),
|
22 |
+
dict(type='PackSegInputs')
|
23 |
+
]
|
24 |
+
|
25 |
+
tta_pipeline = [
|
26 |
+
dict(type='LoadMultipleRSImageFromFile'),
|
27 |
+
dict(
|
28 |
+
type='TestTimeAug',
|
29 |
+
transforms=[[dict(type='LoadAnnotations')],
|
30 |
+
[dict(type='ConcatCDInput')],
|
31 |
+
[dict(type='PackSegInputs')]])
|
32 |
+
]
|
33 |
+
train_dataloader = dict(
|
34 |
+
batch_size=4,
|
35 |
+
num_workers=4,
|
36 |
+
persistent_workers=True,
|
37 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
38 |
+
dataset=dict(
|
39 |
+
type=dataset_type,
|
40 |
+
data_root=data_root,
|
41 |
+
data_prefix=dict(
|
42 |
+
img_path='train/A',
|
43 |
+
img_path2='train/B',
|
44 |
+
seg_map_path='train/label'),
|
45 |
+
pipeline=train_pipeline))
|
46 |
+
val_dataloader = dict(
|
47 |
+
batch_size=1,
|
48 |
+
num_workers=4,
|
49 |
+
persistent_workers=True,
|
50 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
51 |
+
dataset=dict(
|
52 |
+
type=dataset_type,
|
53 |
+
data_root=data_root,
|
54 |
+
data_prefix=dict(
|
55 |
+
img_path='test/A', img_path2='test/B', seg_map_path='test/label'),
|
56 |
+
pipeline=test_pipeline))
|
57 |
+
test_dataloader = val_dataloader
|
58 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
59 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/loveda.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'LoveDADataset'
|
3 |
+
data_root = 'data/loveDA'
|
4 |
+
crop_size = (512, 512)
|
5 |
+
train_pipeline = [
|
6 |
+
dict(type='LoadImageFromFile'),
|
7 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
8 |
+
dict(
|
9 |
+
type='RandomResize',
|
10 |
+
scale=(2048, 512),
|
11 |
+
ratio_range=(0.5, 2.0),
|
12 |
+
keep_ratio=True),
|
13 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
14 |
+
dict(type='RandomFlip', prob=0.5),
|
15 |
+
dict(type='PhotoMetricDistortion'),
|
16 |
+
dict(type='PackSegInputs')
|
17 |
+
]
|
18 |
+
test_pipeline = [
|
19 |
+
dict(type='LoadImageFromFile'),
|
20 |
+
dict(type='Resize', scale=(1024, 1024), keep_ratio=True),
|
21 |
+
# add loading annotation after ``Resize`` because ground truth
|
22 |
+
# does not need to do resize data transform
|
23 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
24 |
+
dict(type='PackSegInputs')
|
25 |
+
]
|
26 |
+
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
27 |
+
tta_pipeline = [
|
28 |
+
dict(type='LoadImageFromFile', backend_args=None),
|
29 |
+
dict(
|
30 |
+
type='TestTimeAug',
|
31 |
+
transforms=[
|
32 |
+
[
|
33 |
+
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
34 |
+
for r in img_ratios
|
35 |
+
],
|
36 |
+
[
|
37 |
+
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
38 |
+
dict(type='RandomFlip', prob=1., direction='horizontal')
|
39 |
+
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
40 |
+
])
|
41 |
+
]
|
42 |
+
train_dataloader = dict(
|
43 |
+
batch_size=4,
|
44 |
+
num_workers=4,
|
45 |
+
persistent_workers=True,
|
46 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
47 |
+
dataset=dict(
|
48 |
+
type=dataset_type,
|
49 |
+
data_root=data_root,
|
50 |
+
data_prefix=dict(
|
51 |
+
img_path='img_dir/train', seg_map_path='ann_dir/train'),
|
52 |
+
pipeline=train_pipeline))
|
53 |
+
val_dataloader = dict(
|
54 |
+
batch_size=1,
|
55 |
+
num_workers=4,
|
56 |
+
persistent_workers=True,
|
57 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
58 |
+
dataset=dict(
|
59 |
+
type=dataset_type,
|
60 |
+
data_root=data_root,
|
61 |
+
data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
|
62 |
+
pipeline=test_pipeline))
|
63 |
+
test_dataloader = val_dataloader
|
64 |
+
|
65 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
66 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/mapillary_v1.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'MapillaryDataset_v1'
|
3 |
+
data_root = 'data/mapillary/'
|
4 |
+
crop_size = (512, 1024)
|
5 |
+
train_pipeline = [
|
6 |
+
dict(type='LoadImageFromFile'),
|
7 |
+
dict(type='LoadAnnotations'),
|
8 |
+
dict(
|
9 |
+
type='RandomResize',
|
10 |
+
scale=(2048, 1024),
|
11 |
+
ratio_range=(0.5, 2.0),
|
12 |
+
keep_ratio=True),
|
13 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
14 |
+
dict(type='RandomFlip', prob=0.5),
|
15 |
+
dict(type='PhotoMetricDistortion'),
|
16 |
+
dict(type='PackSegInputs')
|
17 |
+
]
|
18 |
+
test_pipeline = [
|
19 |
+
dict(type='LoadImageFromFile'),
|
20 |
+
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
|
21 |
+
# add loading annotation after ``Resize`` because ground truth
|
22 |
+
# does not need to do resize data transform
|
23 |
+
dict(type='LoadAnnotations'),
|
24 |
+
dict(type='PackSegInputs')
|
25 |
+
]
|
26 |
+
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
27 |
+
tta_pipeline = [
|
28 |
+
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
|
29 |
+
dict(
|
30 |
+
type='TestTimeAug',
|
31 |
+
transforms=[
|
32 |
+
[
|
33 |
+
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
34 |
+
for r in img_ratios
|
35 |
+
],
|
36 |
+
[
|
37 |
+
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
38 |
+
dict(type='RandomFlip', prob=1., direction='horizontal')
|
39 |
+
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
40 |
+
])
|
41 |
+
]
|
42 |
+
train_dataloader = dict(
|
43 |
+
batch_size=2,
|
44 |
+
num_workers=4,
|
45 |
+
persistent_workers=True,
|
46 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
47 |
+
dataset=dict(
|
48 |
+
type=dataset_type,
|
49 |
+
data_root=data_root,
|
50 |
+
data_prefix=dict(
|
51 |
+
img_path='training/images', seg_map_path='training/v1.2/labels'),
|
52 |
+
pipeline=train_pipeline))
|
53 |
+
val_dataloader = dict(
|
54 |
+
batch_size=1,
|
55 |
+
num_workers=4,
|
56 |
+
persistent_workers=True,
|
57 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
58 |
+
dataset=dict(
|
59 |
+
type=dataset_type,
|
60 |
+
data_root=data_root,
|
61 |
+
data_prefix=dict(
|
62 |
+
img_path='validation/images',
|
63 |
+
seg_map_path='validation/v1.2/labels'),
|
64 |
+
pipeline=test_pipeline))
|
65 |
+
test_dataloader = val_dataloader
|
66 |
+
|
67 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
68 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/mapillary_v1_65.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
_base_ = './mapillary_v1.py'
|
3 |
+
metainfo = dict(
|
4 |
+
classes=('Bird', 'Ground Animal', 'Curb', 'Fence', 'Guard Rail', 'Barrier',
|
5 |
+
'Wall', 'Bike Lane', 'Crosswalk - Plain', 'Curb Cut', 'Parking',
|
6 |
+
'Pedestrian Area', 'Rail Track', 'Road', 'Service Lane',
|
7 |
+
'Sidewalk', 'Bridge', 'Building', 'Tunnel', 'Person', 'Bicyclist',
|
8 |
+
'Motorcyclist', 'Other Rider', 'Lane Marking - Crosswalk',
|
9 |
+
'Lane Marking - General', 'Mountain', 'Sand', 'Sky', 'Snow',
|
10 |
+
'Terrain', 'Vegetation', 'Water', 'Banner', 'Bench', 'Bike Rack',
|
11 |
+
'Billboard', 'Catch Basin', 'CCTV Camera', 'Fire Hydrant',
|
12 |
+
'Junction Box', 'Mailbox', 'Manhole', 'Phone Booth', 'Pothole',
|
13 |
+
'Street Light', 'Pole', 'Traffic Sign Frame', 'Utility Pole',
|
14 |
+
'Traffic Light', 'Traffic Sign (Back)', 'Traffic Sign (Front)',
|
15 |
+
'Trash Can', 'Bicycle', 'Boat', 'Bus', 'Car', 'Caravan',
|
16 |
+
'Motorcycle', 'On Rails', 'Other Vehicle', 'Trailer', 'Truck',
|
17 |
+
'Wheeled Slow', 'Car Mount', 'Ego Vehicle'),
|
18 |
+
palette=[[165, 42, 42], [0, 192, 0], [196, 196, 196], [190, 153, 153],
|
19 |
+
[180, 165, 180], [90, 120, 150], [102, 102, 156], [128, 64, 255],
|
20 |
+
[140, 140, 200], [170, 170, 170], [250, 170, 160], [96, 96, 96],
|
21 |
+
[230, 150, 140], [128, 64, 128], [110, 110, 110], [244, 35, 232],
|
22 |
+
[150, 100, 100], [70, 70, 70], [150, 120, 90], [220, 20, 60],
|
23 |
+
[255, 0, 0], [255, 0, 100], [255, 0, 200], [200, 128, 128],
|
24 |
+
[255, 255, 255], [64, 170, 64], [230, 160, 50], [70, 130, 180],
|
25 |
+
[190, 255, 255], [152, 251, 152], [107, 142, 35], [0, 170, 30],
|
26 |
+
[255, 255, 128], [250, 0, 30], [100, 140, 180], [220, 220, 220],
|
27 |
+
[220, 128, 128], [222, 40, 40], [100, 170, 30], [40, 40, 40],
|
28 |
+
[33, 33, 33], [100, 128, 160], [142, 0, 0], [70, 100, 150],
|
29 |
+
[210, 170, 100], [153, 153, 153], [128, 128, 128], [0, 0, 80],
|
30 |
+
[250, 170, 30], [192, 192, 192], [220, 220, 0], [140, 140, 20],
|
31 |
+
[119, 11, 32], [150, 0, 255], [0, 60, 100], [0, 0, 142],
|
32 |
+
[0, 0, 90], [0, 0, 230], [0, 80, 100], [128, 64, 64], [0, 0, 110],
|
33 |
+
[0, 0, 70], [0, 0, 192], [32, 32, 32], [120, 10, 10]])
|
34 |
+
|
35 |
+
train_dataloader = dict(dataset=dict(metainfo=metainfo))
|
36 |
+
val_dataloader = dict(dataset=dict(metainfo=metainfo))
|
37 |
+
test_dataloader = val_dataloader
|
configs/_base_/datasets/mapillary_v2.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'MapillaryDataset_v2'
|
3 |
+
data_root = 'data/mapillary/'
|
4 |
+
crop_size = (512, 1024)
|
5 |
+
train_pipeline = [
|
6 |
+
dict(type='LoadImageFromFile'),
|
7 |
+
dict(type='LoadAnnotations'),
|
8 |
+
dict(
|
9 |
+
type='RandomResize',
|
10 |
+
scale=(2048, 1024),
|
11 |
+
ratio_range=(0.5, 2.0),
|
12 |
+
keep_ratio=True),
|
13 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
14 |
+
dict(type='RandomFlip', prob=0.5),
|
15 |
+
dict(type='PhotoMetricDistortion'),
|
16 |
+
dict(type='PackSegInputs')
|
17 |
+
]
|
18 |
+
test_pipeline = [
|
19 |
+
dict(type='LoadImageFromFile'),
|
20 |
+
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
|
21 |
+
# add loading annotation after ``Resize`` because ground truth
|
22 |
+
# does not need to do resize data transform
|
23 |
+
dict(type='LoadAnnotations'),
|
24 |
+
dict(type='PackSegInputs')
|
25 |
+
]
|
26 |
+
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
27 |
+
tta_pipeline = [
|
28 |
+
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
|
29 |
+
dict(
|
30 |
+
type='TestTimeAug',
|
31 |
+
transforms=[
|
32 |
+
[
|
33 |
+
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
34 |
+
for r in img_ratios
|
35 |
+
],
|
36 |
+
[
|
37 |
+
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
38 |
+
dict(type='RandomFlip', prob=1., direction='horizontal')
|
39 |
+
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
40 |
+
])
|
41 |
+
]
|
42 |
+
train_dataloader = dict(
|
43 |
+
batch_size=2,
|
44 |
+
num_workers=4,
|
45 |
+
persistent_workers=True,
|
46 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
47 |
+
dataset=dict(
|
48 |
+
type=dataset_type,
|
49 |
+
data_root=data_root,
|
50 |
+
data_prefix=dict(
|
51 |
+
img_path='training/images', seg_map_path='training/v2.0/labels'),
|
52 |
+
pipeline=train_pipeline))
|
53 |
+
val_dataloader = dict(
|
54 |
+
batch_size=1,
|
55 |
+
num_workers=4,
|
56 |
+
persistent_workers=True,
|
57 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
58 |
+
dataset=dict(
|
59 |
+
type=dataset_type,
|
60 |
+
data_root=data_root,
|
61 |
+
data_prefix=dict(
|
62 |
+
img_path='validation/images',
|
63 |
+
seg_map_path='validation/v2.0/labels'),
|
64 |
+
pipeline=test_pipeline))
|
65 |
+
test_dataloader = val_dataloader
|
66 |
+
|
67 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
68 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/nyu.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'NYUDataset'
|
3 |
+
data_root = 'data/nyu'
|
4 |
+
|
5 |
+
train_pipeline = [
|
6 |
+
dict(type='LoadImageFromFile'),
|
7 |
+
dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3),
|
8 |
+
dict(type='RandomDepthMix', prob=0.25),
|
9 |
+
dict(type='RandomFlip', prob=0.5),
|
10 |
+
dict(type='RandomCrop', crop_size=(480, 480)),
|
11 |
+
dict(
|
12 |
+
type='Albu',
|
13 |
+
transforms=[
|
14 |
+
dict(type='RandomBrightnessContrast'),
|
15 |
+
dict(type='RandomGamma'),
|
16 |
+
dict(type='HueSaturationValue'),
|
17 |
+
]),
|
18 |
+
dict(
|
19 |
+
type='PackSegInputs',
|
20 |
+
meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
|
21 |
+
'pad_shape', 'scale_factor', 'flip', 'flip_direction',
|
22 |
+
'category_id')),
|
23 |
+
]
|
24 |
+
|
25 |
+
test_pipeline = [
|
26 |
+
dict(type='LoadImageFromFile'),
|
27 |
+
dict(type='Resize', scale=(2000, 480), keep_ratio=True),
|
28 |
+
dict(dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3)),
|
29 |
+
dict(
|
30 |
+
type='PackSegInputs',
|
31 |
+
meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
|
32 |
+
'pad_shape', 'scale_factor', 'flip', 'flip_direction',
|
33 |
+
'category_id'))
|
34 |
+
]
|
35 |
+
|
36 |
+
train_dataloader = dict(
|
37 |
+
batch_size=8,
|
38 |
+
num_workers=8,
|
39 |
+
persistent_workers=True,
|
40 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
41 |
+
dataset=dict(
|
42 |
+
type=dataset_type,
|
43 |
+
data_root=data_root,
|
44 |
+
data_prefix=dict(
|
45 |
+
img_path='images/train', depth_map_path='annotations/train'),
|
46 |
+
pipeline=train_pipeline))
|
47 |
+
|
48 |
+
val_dataloader = dict(
|
49 |
+
batch_size=1,
|
50 |
+
num_workers=4,
|
51 |
+
persistent_workers=True,
|
52 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
53 |
+
dataset=dict(
|
54 |
+
type=dataset_type,
|
55 |
+
data_root=data_root,
|
56 |
+
test_mode=True,
|
57 |
+
data_prefix=dict(
|
58 |
+
img_path='images/test', depth_map_path='annotations/test'),
|
59 |
+
pipeline=test_pipeline))
|
60 |
+
test_dataloader = val_dataloader
|
61 |
+
|
62 |
+
val_evaluator = dict(
|
63 |
+
type='DepthMetric',
|
64 |
+
min_depth_eval=0.001,
|
65 |
+
max_depth_eval=10.0,
|
66 |
+
crop_type='nyu_crop')
|
67 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/nyu_512x512.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'NYUDataset'
|
3 |
+
data_root = 'data/nyu'
|
4 |
+
|
5 |
+
train_pipeline = [
|
6 |
+
dict(type='LoadImageFromFile'),
|
7 |
+
dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3),
|
8 |
+
dict(type='RandomDepthMix', prob=0.25),
|
9 |
+
dict(type='RandomFlip', prob=0.5),
|
10 |
+
dict(
|
11 |
+
type='RandomResize',
|
12 |
+
scale=(768, 512),
|
13 |
+
ratio_range=(0.8, 1.5),
|
14 |
+
keep_ratio=True),
|
15 |
+
dict(type='RandomCrop', crop_size=(512, 512)),
|
16 |
+
dict(
|
17 |
+
type='Albu',
|
18 |
+
transforms=[
|
19 |
+
dict(type='RandomBrightnessContrast'),
|
20 |
+
dict(type='RandomGamma'),
|
21 |
+
dict(type='HueSaturationValue'),
|
22 |
+
]),
|
23 |
+
dict(
|
24 |
+
type='PackSegInputs',
|
25 |
+
meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
|
26 |
+
'pad_shape', 'scale_factor', 'flip', 'flip_direction',
|
27 |
+
'category_id')),
|
28 |
+
]
|
29 |
+
|
30 |
+
test_pipeline = [
|
31 |
+
dict(type='LoadImageFromFile'),
|
32 |
+
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
|
33 |
+
dict(dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3)),
|
34 |
+
dict(
|
35 |
+
type='PackSegInputs',
|
36 |
+
meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
|
37 |
+
'pad_shape', 'scale_factor', 'flip', 'flip_direction',
|
38 |
+
'category_id'))
|
39 |
+
]
|
40 |
+
|
41 |
+
train_dataloader = dict(
|
42 |
+
batch_size=8,
|
43 |
+
num_workers=8,
|
44 |
+
persistent_workers=True,
|
45 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
46 |
+
dataset=dict(
|
47 |
+
type=dataset_type,
|
48 |
+
data_root=data_root,
|
49 |
+
data_prefix=dict(
|
50 |
+
img_path='images/train', depth_map_path='annotations/train'),
|
51 |
+
pipeline=train_pipeline))
|
52 |
+
|
53 |
+
val_dataloader = dict(
|
54 |
+
batch_size=1,
|
55 |
+
num_workers=4,
|
56 |
+
persistent_workers=True,
|
57 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
58 |
+
dataset=dict(
|
59 |
+
type=dataset_type,
|
60 |
+
data_root=data_root,
|
61 |
+
test_mode=True,
|
62 |
+
data_prefix=dict(
|
63 |
+
img_path='images/test', depth_map_path='annotations/test'),
|
64 |
+
pipeline=test_pipeline))
|
65 |
+
test_dataloader = val_dataloader
|
66 |
+
|
67 |
+
val_evaluator = dict(
|
68 |
+
type='DepthMetric',
|
69 |
+
min_depth_eval=0.001,
|
70 |
+
max_depth_eval=10.0,
|
71 |
+
crop_type='nyu_crop')
|
72 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/pascal_context.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'PascalContextDataset'
|
3 |
+
data_root = 'data/VOCdevkit/VOC2010/'
|
4 |
+
|
5 |
+
img_scale = (520, 520)
|
6 |
+
crop_size = (480, 480)
|
7 |
+
|
8 |
+
train_pipeline = [
|
9 |
+
dict(type='LoadImageFromFile'),
|
10 |
+
dict(type='LoadAnnotations'),
|
11 |
+
dict(
|
12 |
+
type='RandomResize',
|
13 |
+
scale=img_scale,
|
14 |
+
ratio_range=(0.5, 2.0),
|
15 |
+
keep_ratio=True),
|
16 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
17 |
+
dict(type='RandomFlip', prob=0.5),
|
18 |
+
dict(type='PhotoMetricDistortion'),
|
19 |
+
dict(type='PackSegInputs')
|
20 |
+
]
|
21 |
+
test_pipeline = [
|
22 |
+
dict(type='LoadImageFromFile'),
|
23 |
+
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
24 |
+
# add loading annotation after ``Resize`` because ground truth
|
25 |
+
# does not need to do resize data transform
|
26 |
+
dict(type='LoadAnnotations'),
|
27 |
+
dict(type='PackSegInputs')
|
28 |
+
]
|
29 |
+
train_dataloader = dict(
|
30 |
+
batch_size=4,
|
31 |
+
num_workers=4,
|
32 |
+
persistent_workers=True,
|
33 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
34 |
+
dataset=dict(
|
35 |
+
type=dataset_type,
|
36 |
+
data_root=data_root,
|
37 |
+
data_prefix=dict(
|
38 |
+
img_path='JPEGImages', seg_map_path='SegmentationClassContext'),
|
39 |
+
ann_file='ImageSets/SegmentationContext/train.txt',
|
40 |
+
pipeline=train_pipeline))
|
41 |
+
val_dataloader = dict(
|
42 |
+
batch_size=1,
|
43 |
+
num_workers=4,
|
44 |
+
persistent_workers=True,
|
45 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
46 |
+
dataset=dict(
|
47 |
+
type=dataset_type,
|
48 |
+
data_root=data_root,
|
49 |
+
data_prefix=dict(
|
50 |
+
img_path='JPEGImages', seg_map_path='SegmentationClassContext'),
|
51 |
+
ann_file='ImageSets/SegmentationContext/val.txt',
|
52 |
+
pipeline=test_pipeline))
|
53 |
+
test_dataloader = val_dataloader
|
54 |
+
|
55 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
56 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/pascal_context_59.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'PascalContextDataset59'
|
3 |
+
data_root = 'data/VOCdevkit/VOC2010/'
|
4 |
+
|
5 |
+
img_scale = (520, 520)
|
6 |
+
crop_size = (480, 480)
|
7 |
+
|
8 |
+
train_pipeline = [
|
9 |
+
dict(type='LoadImageFromFile'),
|
10 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
11 |
+
dict(
|
12 |
+
type='RandomResize',
|
13 |
+
scale=img_scale,
|
14 |
+
ratio_range=(0.5, 2.0),
|
15 |
+
keep_ratio=True),
|
16 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
17 |
+
dict(type='RandomFlip', prob=0.5),
|
18 |
+
dict(type='PhotoMetricDistortion'),
|
19 |
+
dict(type='PackSegInputs')
|
20 |
+
]
|
21 |
+
test_pipeline = [
|
22 |
+
dict(type='LoadImageFromFile'),
|
23 |
+
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
24 |
+
# add loading annotation after ``Resize`` because ground truth
|
25 |
+
# does not need to do resize data transform
|
26 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
27 |
+
dict(type='PackSegInputs')
|
28 |
+
]
|
29 |
+
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
30 |
+
tta_pipeline = [
|
31 |
+
dict(type='LoadImageFromFile', backend_args=None),
|
32 |
+
dict(
|
33 |
+
type='TestTimeAug',
|
34 |
+
transforms=[
|
35 |
+
[
|
36 |
+
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
37 |
+
for r in img_ratios
|
38 |
+
],
|
39 |
+
[
|
40 |
+
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
41 |
+
dict(type='RandomFlip', prob=1., direction='horizontal')
|
42 |
+
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
43 |
+
])
|
44 |
+
]
|
45 |
+
train_dataloader = dict(
|
46 |
+
batch_size=4,
|
47 |
+
num_workers=4,
|
48 |
+
persistent_workers=True,
|
49 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
50 |
+
dataset=dict(
|
51 |
+
type=dataset_type,
|
52 |
+
data_root=data_root,
|
53 |
+
data_prefix=dict(
|
54 |
+
img_path='JPEGImages', seg_map_path='SegmentationClassContext'),
|
55 |
+
ann_file='ImageSets/SegmentationContext/train.txt',
|
56 |
+
pipeline=train_pipeline))
|
57 |
+
val_dataloader = dict(
|
58 |
+
batch_size=1,
|
59 |
+
num_workers=4,
|
60 |
+
persistent_workers=True,
|
61 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
62 |
+
dataset=dict(
|
63 |
+
type=dataset_type,
|
64 |
+
data_root=data_root,
|
65 |
+
data_prefix=dict(
|
66 |
+
img_path='JPEGImages', seg_map_path='SegmentationClassContext'),
|
67 |
+
ann_file='ImageSets/SegmentationContext/val.txt',
|
68 |
+
pipeline=test_pipeline))
|
69 |
+
test_dataloader = val_dataloader
|
70 |
+
|
71 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
72 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/pascal_voc12.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'PascalVOCDataset'
|
3 |
+
data_root = 'data/VOCdevkit/VOC2012'
|
4 |
+
crop_size = (512, 512)
|
5 |
+
train_pipeline = [
|
6 |
+
dict(type='LoadImageFromFile'),
|
7 |
+
dict(type='LoadAnnotations'),
|
8 |
+
dict(
|
9 |
+
type='RandomResize',
|
10 |
+
scale=(2048, 512),
|
11 |
+
ratio_range=(0.5, 2.0),
|
12 |
+
keep_ratio=True),
|
13 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
14 |
+
dict(type='RandomFlip', prob=0.5),
|
15 |
+
dict(type='PhotoMetricDistortion'),
|
16 |
+
dict(type='PackSegInputs')
|
17 |
+
]
|
18 |
+
test_pipeline = [
|
19 |
+
dict(type='LoadImageFromFile'),
|
20 |
+
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
|
21 |
+
# add loading annotation after ``Resize`` because ground truth
|
22 |
+
# does not need to do resize data transform
|
23 |
+
dict(type='LoadAnnotations'),
|
24 |
+
dict(type='PackSegInputs')
|
25 |
+
]
|
26 |
+
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
27 |
+
tta_pipeline = [
|
28 |
+
dict(type='LoadImageFromFile', backend_args=None),
|
29 |
+
dict(
|
30 |
+
type='TestTimeAug',
|
31 |
+
transforms=[
|
32 |
+
[
|
33 |
+
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
34 |
+
for r in img_ratios
|
35 |
+
],
|
36 |
+
[
|
37 |
+
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
38 |
+
dict(type='RandomFlip', prob=1., direction='horizontal')
|
39 |
+
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
40 |
+
])
|
41 |
+
]
|
42 |
+
train_dataloader = dict(
|
43 |
+
batch_size=4,
|
44 |
+
num_workers=4,
|
45 |
+
persistent_workers=True,
|
46 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
47 |
+
dataset=dict(
|
48 |
+
type=dataset_type,
|
49 |
+
data_root=data_root,
|
50 |
+
data_prefix=dict(
|
51 |
+
img_path='JPEGImages', seg_map_path='SegmentationClass'),
|
52 |
+
ann_file='ImageSets/Segmentation/train.txt',
|
53 |
+
pipeline=train_pipeline))
|
54 |
+
val_dataloader = dict(
|
55 |
+
batch_size=1,
|
56 |
+
num_workers=4,
|
57 |
+
persistent_workers=True,
|
58 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
59 |
+
dataset=dict(
|
60 |
+
type=dataset_type,
|
61 |
+
data_root=data_root,
|
62 |
+
data_prefix=dict(
|
63 |
+
img_path='JPEGImages', seg_map_path='SegmentationClass'),
|
64 |
+
ann_file='ImageSets/Segmentation/val.txt',
|
65 |
+
pipeline=test_pipeline))
|
66 |
+
test_dataloader = val_dataloader
|
67 |
+
|
68 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
69 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/pascal_voc12_aug.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'PascalVOCDataset'
|
3 |
+
data_root = 'data/VOCdevkit/VOC2012'
|
4 |
+
crop_size = (512, 512)
|
5 |
+
train_pipeline = [
|
6 |
+
dict(type='LoadImageFromFile'),
|
7 |
+
dict(type='LoadAnnotations'),
|
8 |
+
dict(
|
9 |
+
type='RandomResize',
|
10 |
+
scale=(2048, 512),
|
11 |
+
ratio_range=(0.5, 2.0),
|
12 |
+
keep_ratio=True),
|
13 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
14 |
+
dict(type='RandomFlip', prob=0.5),
|
15 |
+
dict(type='PhotoMetricDistortion'),
|
16 |
+
dict(type='Pad', size=crop_size),
|
17 |
+
dict(type='PackSegInputs')
|
18 |
+
]
|
19 |
+
|
20 |
+
test_pipeline = [
|
21 |
+
dict(type='LoadImageFromFile'),
|
22 |
+
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
|
23 |
+
# add loading annotation after ``Resize`` because ground truth
|
24 |
+
# does not need to do resize data transform
|
25 |
+
dict(type='LoadAnnotations'),
|
26 |
+
dict(type='PackSegInputs')
|
27 |
+
]
|
28 |
+
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
29 |
+
tta_pipeline = [
|
30 |
+
dict(type='LoadImageFromFile', backend_args=None),
|
31 |
+
dict(
|
32 |
+
type='TestTimeAug',
|
33 |
+
transforms=[
|
34 |
+
[
|
35 |
+
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
36 |
+
for r in img_ratios
|
37 |
+
],
|
38 |
+
[
|
39 |
+
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
40 |
+
dict(type='RandomFlip', prob=1., direction='horizontal')
|
41 |
+
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
42 |
+
])
|
43 |
+
]
|
44 |
+
dataset_train = dict(
|
45 |
+
type=dataset_type,
|
46 |
+
data_root=data_root,
|
47 |
+
data_prefix=dict(img_path='JPEGImages', seg_map_path='SegmentationClass'),
|
48 |
+
ann_file='ImageSets/Segmentation/train.txt',
|
49 |
+
pipeline=train_pipeline)
|
50 |
+
|
51 |
+
dataset_aug = dict(
|
52 |
+
type=dataset_type,
|
53 |
+
data_root=data_root,
|
54 |
+
data_prefix=dict(
|
55 |
+
img_path='JPEGImages', seg_map_path='SegmentationClassAug'),
|
56 |
+
ann_file='ImageSets/Segmentation/aug.txt',
|
57 |
+
pipeline=train_pipeline)
|
58 |
+
|
59 |
+
train_dataloader = dict(
|
60 |
+
batch_size=4,
|
61 |
+
num_workers=4,
|
62 |
+
persistent_workers=True,
|
63 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
64 |
+
dataset=dict(type='ConcatDataset', datasets=[dataset_train, dataset_aug]))
|
65 |
+
|
66 |
+
val_dataloader = dict(
|
67 |
+
batch_size=1,
|
68 |
+
num_workers=4,
|
69 |
+
persistent_workers=True,
|
70 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
71 |
+
dataset=dict(
|
72 |
+
type=dataset_type,
|
73 |
+
data_root=data_root,
|
74 |
+
data_prefix=dict(
|
75 |
+
img_path='JPEGImages', seg_map_path='SegmentationClass'),
|
76 |
+
ann_file='ImageSets/Segmentation/val.txt',
|
77 |
+
pipeline=test_pipeline))
|
78 |
+
test_dataloader = val_dataloader
|
79 |
+
|
80 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
81 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/potsdam.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'PotsdamDataset'
|
3 |
+
data_root = 'data/potsdam'
|
4 |
+
crop_size = (512, 512)
|
5 |
+
train_pipeline = [
|
6 |
+
dict(type='LoadImageFromFile'),
|
7 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
8 |
+
dict(
|
9 |
+
type='RandomResize',
|
10 |
+
scale=(512, 512),
|
11 |
+
ratio_range=(0.5, 2.0),
|
12 |
+
keep_ratio=True),
|
13 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
14 |
+
dict(type='RandomFlip', prob=0.5),
|
15 |
+
dict(type='PhotoMetricDistortion'),
|
16 |
+
dict(type='PackSegInputs')
|
17 |
+
]
|
18 |
+
test_pipeline = [
|
19 |
+
dict(type='LoadImageFromFile'),
|
20 |
+
dict(type='Resize', scale=(512, 512), keep_ratio=True),
|
21 |
+
# add loading annotation after ``Resize`` because ground truth
|
22 |
+
# does not need to do resize data transform
|
23 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
24 |
+
dict(type='PackSegInputs')
|
25 |
+
]
|
26 |
+
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
27 |
+
tta_pipeline = [
|
28 |
+
dict(type='LoadImageFromFile', backend_args=None),
|
29 |
+
dict(
|
30 |
+
type='TestTimeAug',
|
31 |
+
transforms=[
|
32 |
+
[
|
33 |
+
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
34 |
+
for r in img_ratios
|
35 |
+
],
|
36 |
+
[
|
37 |
+
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
38 |
+
dict(type='RandomFlip', prob=1., direction='horizontal')
|
39 |
+
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
40 |
+
])
|
41 |
+
]
|
42 |
+
train_dataloader = dict(
|
43 |
+
batch_size=4,
|
44 |
+
num_workers=4,
|
45 |
+
persistent_workers=True,
|
46 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
47 |
+
dataset=dict(
|
48 |
+
type=dataset_type,
|
49 |
+
data_root=data_root,
|
50 |
+
data_prefix=dict(
|
51 |
+
img_path='img_dir/train', seg_map_path='ann_dir/train'),
|
52 |
+
pipeline=train_pipeline))
|
53 |
+
val_dataloader = dict(
|
54 |
+
batch_size=1,
|
55 |
+
num_workers=4,
|
56 |
+
persistent_workers=True,
|
57 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
58 |
+
dataset=dict(
|
59 |
+
type=dataset_type,
|
60 |
+
data_root=data_root,
|
61 |
+
data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
|
62 |
+
pipeline=test_pipeline))
|
63 |
+
test_dataloader = val_dataloader
|
64 |
+
|
65 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
66 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/refuge.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'REFUGEDataset'
|
3 |
+
data_root = 'data/REFUGE'
|
4 |
+
train_img_scale = (2056, 2124)
|
5 |
+
val_img_scale = (1634, 1634)
|
6 |
+
test_img_scale = (1634, 1634)
|
7 |
+
crop_size = (512, 512)
|
8 |
+
|
9 |
+
train_pipeline = [
|
10 |
+
dict(type='LoadImageFromFile'),
|
11 |
+
dict(type='LoadAnnotations', reduce_zero_label=False),
|
12 |
+
dict(
|
13 |
+
type='RandomResize',
|
14 |
+
scale=train_img_scale,
|
15 |
+
ratio_range=(0.5, 2.0),
|
16 |
+
keep_ratio=True),
|
17 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
18 |
+
dict(type='RandomFlip', prob=0.5),
|
19 |
+
dict(type='PhotoMetricDistortion'),
|
20 |
+
dict(type='PackSegInputs')
|
21 |
+
]
|
22 |
+
val_pipeline = [
|
23 |
+
dict(type='LoadImageFromFile'),
|
24 |
+
dict(type='Resize', scale=val_img_scale, keep_ratio=True),
|
25 |
+
# add loading annotation after ``Resize`` because ground truth
|
26 |
+
# does not need to do resize data transform
|
27 |
+
dict(type='LoadAnnotations', reduce_zero_label=False),
|
28 |
+
dict(type='PackSegInputs')
|
29 |
+
]
|
30 |
+
test_pipeline = [
|
31 |
+
dict(type='LoadImageFromFile'),
|
32 |
+
dict(type='Resize', scale=test_img_scale, keep_ratio=True),
|
33 |
+
# add loading annotation after ``Resize`` because ground truth
|
34 |
+
# does not need to do resize data transform
|
35 |
+
dict(type='LoadAnnotations', reduce_zero_label=False),
|
36 |
+
dict(type='PackSegInputs')
|
37 |
+
]
|
38 |
+
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
39 |
+
tta_pipeline = [
|
40 |
+
dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
|
41 |
+
dict(
|
42 |
+
type='TestTimeAug',
|
43 |
+
transforms=[
|
44 |
+
[
|
45 |
+
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
46 |
+
for r in img_ratios
|
47 |
+
],
|
48 |
+
[
|
49 |
+
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
50 |
+
dict(type='RandomFlip', prob=1., direction='horizontal')
|
51 |
+
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
52 |
+
])
|
53 |
+
]
|
54 |
+
train_dataloader = dict(
|
55 |
+
batch_size=4,
|
56 |
+
num_workers=4,
|
57 |
+
persistent_workers=True,
|
58 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
59 |
+
dataset=dict(
|
60 |
+
type=dataset_type,
|
61 |
+
data_root=data_root,
|
62 |
+
data_prefix=dict(
|
63 |
+
img_path='images/training', seg_map_path='annotations/training'),
|
64 |
+
pipeline=train_pipeline))
|
65 |
+
val_dataloader = dict(
|
66 |
+
batch_size=1,
|
67 |
+
num_workers=4,
|
68 |
+
persistent_workers=True,
|
69 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
70 |
+
dataset=dict(
|
71 |
+
type=dataset_type,
|
72 |
+
data_root=data_root,
|
73 |
+
data_prefix=dict(
|
74 |
+
img_path='images/validation',
|
75 |
+
seg_map_path='annotations/validation'),
|
76 |
+
pipeline=val_pipeline))
|
77 |
+
test_dataloader = dict(
|
78 |
+
batch_size=1,
|
79 |
+
num_workers=4,
|
80 |
+
persistent_workers=True,
|
81 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
82 |
+
dataset=dict(
|
83 |
+
type=dataset_type,
|
84 |
+
data_root=data_root,
|
85 |
+
data_prefix=dict(
|
86 |
+
img_path='images/test', seg_map_path='annotations/test'),
|
87 |
+
pipeline=val_pipeline))
|
88 |
+
|
89 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
|
90 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/stare.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'STAREDataset'
|
3 |
+
data_root = 'data/STARE'
|
4 |
+
img_scale = (605, 700)
|
5 |
+
crop_size = (128, 128)
|
6 |
+
train_pipeline = [
|
7 |
+
dict(type='LoadImageFromFile'),
|
8 |
+
dict(type='LoadAnnotations'),
|
9 |
+
dict(
|
10 |
+
type='RandomResize',
|
11 |
+
scale=img_scale,
|
12 |
+
ratio_range=(0.5, 2.0),
|
13 |
+
keep_ratio=True),
|
14 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
15 |
+
dict(type='RandomFlip', prob=0.5),
|
16 |
+
dict(type='PhotoMetricDistortion'),
|
17 |
+
dict(type='PackSegInputs')
|
18 |
+
]
|
19 |
+
test_pipeline = [
|
20 |
+
dict(type='LoadImageFromFile'),
|
21 |
+
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
22 |
+
# add loading annotation after ``Resize`` because ground truth
|
23 |
+
# does not need to do resize data transform
|
24 |
+
dict(type='LoadAnnotations'),
|
25 |
+
dict(type='PackSegInputs')
|
26 |
+
]
|
27 |
+
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
28 |
+
tta_pipeline = [
|
29 |
+
dict(type='LoadImageFromFile', backend_args=None),
|
30 |
+
dict(
|
31 |
+
type='TestTimeAug',
|
32 |
+
transforms=[
|
33 |
+
[
|
34 |
+
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
35 |
+
for r in img_ratios
|
36 |
+
],
|
37 |
+
[
|
38 |
+
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
39 |
+
dict(type='RandomFlip', prob=1., direction='horizontal')
|
40 |
+
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
41 |
+
])
|
42 |
+
]
|
43 |
+
train_dataloader = dict(
|
44 |
+
batch_size=4,
|
45 |
+
num_workers=4,
|
46 |
+
persistent_workers=True,
|
47 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
48 |
+
dataset=dict(
|
49 |
+
type='RepeatDataset',
|
50 |
+
times=40000,
|
51 |
+
dataset=dict(
|
52 |
+
type=dataset_type,
|
53 |
+
data_root=data_root,
|
54 |
+
data_prefix=dict(
|
55 |
+
img_path='images/training',
|
56 |
+
seg_map_path='annotations/training'),
|
57 |
+
pipeline=train_pipeline)))
|
58 |
+
val_dataloader = dict(
|
59 |
+
batch_size=1,
|
60 |
+
num_workers=4,
|
61 |
+
persistent_workers=True,
|
62 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
63 |
+
dataset=dict(
|
64 |
+
type=dataset_type,
|
65 |
+
data_root=data_root,
|
66 |
+
data_prefix=dict(
|
67 |
+
img_path='images/validation',
|
68 |
+
seg_map_path='annotations/validation'),
|
69 |
+
pipeline=test_pipeline))
|
70 |
+
test_dataloader = val_dataloader
|
71 |
+
|
72 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
|
73 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/synapse.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset_type = 'SynapseDataset'
|
2 |
+
data_root = 'data/synapse/'
|
3 |
+
img_scale = (224, 224)
|
4 |
+
train_pipeline = [
|
5 |
+
dict(type='LoadImageFromFile'),
|
6 |
+
dict(type='LoadAnnotations'),
|
7 |
+
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
8 |
+
dict(type='RandomRotFlip', rotate_prob=0.5, flip_prob=0.5, degree=20),
|
9 |
+
dict(type='PackSegInputs')
|
10 |
+
]
|
11 |
+
test_pipeline = [
|
12 |
+
dict(type='LoadImageFromFile'),
|
13 |
+
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
14 |
+
dict(type='LoadAnnotations'),
|
15 |
+
dict(type='PackSegInputs')
|
16 |
+
]
|
17 |
+
train_dataloader = dict(
|
18 |
+
batch_size=6,
|
19 |
+
num_workers=2,
|
20 |
+
persistent_workers=True,
|
21 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
22 |
+
dataset=dict(
|
23 |
+
type=dataset_type,
|
24 |
+
data_root=data_root,
|
25 |
+
data_prefix=dict(
|
26 |
+
img_path='img_dir/train', seg_map_path='ann_dir/train'),
|
27 |
+
pipeline=train_pipeline))
|
28 |
+
val_dataloader = dict(
|
29 |
+
batch_size=1,
|
30 |
+
num_workers=4,
|
31 |
+
persistent_workers=True,
|
32 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
33 |
+
dataset=dict(
|
34 |
+
type=dataset_type,
|
35 |
+
data_root=data_root,
|
36 |
+
data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
|
37 |
+
pipeline=test_pipeline))
|
38 |
+
test_dataloader = val_dataloader
|
39 |
+
|
40 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
|
41 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/vaihingen.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset settings
|
2 |
+
dataset_type = 'ISPRSDataset'
|
3 |
+
data_root = 'data/vaihingen'
|
4 |
+
crop_size = (512, 512)
|
5 |
+
train_pipeline = [
|
6 |
+
dict(type='LoadImageFromFile'),
|
7 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
8 |
+
dict(
|
9 |
+
type='RandomResize',
|
10 |
+
scale=(512, 512),
|
11 |
+
ratio_range=(0.5, 2.0),
|
12 |
+
keep_ratio=True),
|
13 |
+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
14 |
+
dict(type='RandomFlip', prob=0.5),
|
15 |
+
dict(type='PhotoMetricDistortion'),
|
16 |
+
dict(type='PackSegInputs')
|
17 |
+
]
|
18 |
+
test_pipeline = [
|
19 |
+
dict(type='LoadImageFromFile'),
|
20 |
+
dict(type='Resize', scale=(512, 512), keep_ratio=True),
|
21 |
+
# add loading annotation after ``Resize`` because ground truth
|
22 |
+
# does not need to do resize data transform
|
23 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
24 |
+
dict(type='PackSegInputs')
|
25 |
+
]
|
26 |
+
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
27 |
+
tta_pipeline = [
|
28 |
+
dict(type='LoadImageFromFile', backend_args=None),
|
29 |
+
dict(
|
30 |
+
type='TestTimeAug',
|
31 |
+
transforms=[
|
32 |
+
[
|
33 |
+
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
34 |
+
for r in img_ratios
|
35 |
+
],
|
36 |
+
[
|
37 |
+
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
38 |
+
dict(type='RandomFlip', prob=1., direction='horizontal')
|
39 |
+
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
40 |
+
])
|
41 |
+
]
|
42 |
+
train_dataloader = dict(
|
43 |
+
batch_size=4,
|
44 |
+
num_workers=4,
|
45 |
+
persistent_workers=True,
|
46 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
47 |
+
dataset=dict(
|
48 |
+
type=dataset_type,
|
49 |
+
data_root=data_root,
|
50 |
+
data_prefix=dict(
|
51 |
+
img_path='img_dir/train', seg_map_path='ann_dir/train'),
|
52 |
+
pipeline=train_pipeline))
|
53 |
+
val_dataloader = dict(
|
54 |
+
batch_size=1,
|
55 |
+
num_workers=4,
|
56 |
+
persistent_workers=True,
|
57 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
58 |
+
dataset=dict(
|
59 |
+
type=dataset_type,
|
60 |
+
data_root=data_root,
|
61 |
+
data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
|
62 |
+
pipeline=test_pipeline))
|
63 |
+
test_dataloader = val_dataloader
|
64 |
+
|
65 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
66 |
+
test_evaluator = val_evaluator
|
configs/_base_/default_runtime.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
default_scope = 'mmseg'
|
2 |
+
env_cfg = dict(
|
3 |
+
cudnn_benchmark=True,
|
4 |
+
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
5 |
+
dist_cfg=dict(backend='nccl'),
|
6 |
+
)
|
7 |
+
vis_backends = [dict(type='LocalVisBackend')]
|
8 |
+
visualizer = dict(
|
9 |
+
type='SegLocalVisualizer', vis_backends=vis_backends, name='visualizer')
|
10 |
+
log_processor = dict(by_epoch=False)
|
11 |
+
log_level = 'INFO'
|
12 |
+
load_from = None
|
13 |
+
resume = False
|
14 |
+
|
15 |
+
tta_model = dict(type='SegTTAModel')
|
configs/_base_/models/ann_r50-d8.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type='SegDataPreProcessor',
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255)
|
10 |
+
model = dict(
|
11 |
+
type='EncoderDecoder',
|
12 |
+
data_preprocessor=data_preprocessor,
|
13 |
+
pretrained='open-mmlab://resnet50_v1c',
|
14 |
+
backbone=dict(
|
15 |
+
type='ResNetV1c',
|
16 |
+
depth=50,
|
17 |
+
num_stages=4,
|
18 |
+
out_indices=(0, 1, 2, 3),
|
19 |
+
dilations=(1, 1, 2, 4),
|
20 |
+
strides=(1, 2, 1, 1),
|
21 |
+
norm_cfg=norm_cfg,
|
22 |
+
norm_eval=False,
|
23 |
+
style='pytorch',
|
24 |
+
contract_dilation=True),
|
25 |
+
decode_head=dict(
|
26 |
+
type='ANNHead',
|
27 |
+
in_channels=[1024, 2048],
|
28 |
+
in_index=[2, 3],
|
29 |
+
channels=512,
|
30 |
+
project_channels=256,
|
31 |
+
query_scales=(1, ),
|
32 |
+
key_pool_scales=(1, 3, 6, 8),
|
33 |
+
dropout_ratio=0.1,
|
34 |
+
num_classes=19,
|
35 |
+
norm_cfg=norm_cfg,
|
36 |
+
align_corners=False,
|
37 |
+
loss_decode=dict(
|
38 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
39 |
+
auxiliary_head=dict(
|
40 |
+
type='FCNHead',
|
41 |
+
in_channels=1024,
|
42 |
+
in_index=2,
|
43 |
+
channels=256,
|
44 |
+
num_convs=1,
|
45 |
+
concat_input=False,
|
46 |
+
dropout_ratio=0.1,
|
47 |
+
num_classes=19,
|
48 |
+
norm_cfg=norm_cfg,
|
49 |
+
align_corners=False,
|
50 |
+
loss_decode=dict(
|
51 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
52 |
+
# model training and testing settings
|
53 |
+
train_cfg=dict(),
|
54 |
+
test_cfg=dict(mode='whole'))
|
configs/_base_/models/apcnet_r50-d8.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type='SegDataPreProcessor',
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255)
|
10 |
+
model = dict(
|
11 |
+
type='EncoderDecoder',
|
12 |
+
data_preprocessor=data_preprocessor,
|
13 |
+
pretrained='open-mmlab://resnet50_v1c',
|
14 |
+
backbone=dict(
|
15 |
+
type='ResNetV1c',
|
16 |
+
depth=50,
|
17 |
+
num_stages=4,
|
18 |
+
out_indices=(0, 1, 2, 3),
|
19 |
+
dilations=(1, 1, 2, 4),
|
20 |
+
strides=(1, 2, 1, 1),
|
21 |
+
norm_cfg=norm_cfg,
|
22 |
+
norm_eval=False,
|
23 |
+
style='pytorch',
|
24 |
+
contract_dilation=True),
|
25 |
+
decode_head=dict(
|
26 |
+
type='APCHead',
|
27 |
+
in_channels=2048,
|
28 |
+
in_index=3,
|
29 |
+
channels=512,
|
30 |
+
pool_scales=(1, 2, 3, 6),
|
31 |
+
dropout_ratio=0.1,
|
32 |
+
num_classes=19,
|
33 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
34 |
+
align_corners=False,
|
35 |
+
loss_decode=dict(
|
36 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
37 |
+
auxiliary_head=dict(
|
38 |
+
type='FCNHead',
|
39 |
+
in_channels=1024,
|
40 |
+
in_index=2,
|
41 |
+
channels=256,
|
42 |
+
num_convs=1,
|
43 |
+
concat_input=False,
|
44 |
+
dropout_ratio=0.1,
|
45 |
+
num_classes=19,
|
46 |
+
norm_cfg=norm_cfg,
|
47 |
+
align_corners=False,
|
48 |
+
loss_decode=dict(
|
49 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
50 |
+
# model training and testing settings
|
51 |
+
train_cfg=dict(),
|
52 |
+
test_cfg=dict(mode='whole'))
|
configs/_base_/models/bisenetv1_r18-d32.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type='SegDataPreProcessor',
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255)
|
10 |
+
model = dict(
|
11 |
+
type='EncoderDecoder',
|
12 |
+
data_preprocessor=data_preprocessor,
|
13 |
+
backbone=dict(
|
14 |
+
type='BiSeNetV1',
|
15 |
+
in_channels=3,
|
16 |
+
context_channels=(128, 256, 512),
|
17 |
+
spatial_channels=(64, 64, 64, 128),
|
18 |
+
out_indices=(0, 1, 2),
|
19 |
+
out_channels=256,
|
20 |
+
backbone_cfg=dict(
|
21 |
+
type='ResNet',
|
22 |
+
in_channels=3,
|
23 |
+
depth=18,
|
24 |
+
num_stages=4,
|
25 |
+
out_indices=(0, 1, 2, 3),
|
26 |
+
dilations=(1, 1, 1, 1),
|
27 |
+
strides=(1, 2, 2, 2),
|
28 |
+
norm_cfg=norm_cfg,
|
29 |
+
norm_eval=False,
|
30 |
+
style='pytorch',
|
31 |
+
contract_dilation=True),
|
32 |
+
norm_cfg=norm_cfg,
|
33 |
+
align_corners=False,
|
34 |
+
init_cfg=None),
|
35 |
+
decode_head=dict(
|
36 |
+
type='FCNHead',
|
37 |
+
in_channels=256,
|
38 |
+
in_index=0,
|
39 |
+
channels=256,
|
40 |
+
num_convs=1,
|
41 |
+
concat_input=False,
|
42 |
+
dropout_ratio=0.1,
|
43 |
+
num_classes=19,
|
44 |
+
norm_cfg=norm_cfg,
|
45 |
+
align_corners=False,
|
46 |
+
loss_decode=dict(
|
47 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
48 |
+
auxiliary_head=[
|
49 |
+
dict(
|
50 |
+
type='FCNHead',
|
51 |
+
in_channels=128,
|
52 |
+
channels=64,
|
53 |
+
num_convs=1,
|
54 |
+
num_classes=19,
|
55 |
+
in_index=1,
|
56 |
+
norm_cfg=norm_cfg,
|
57 |
+
concat_input=False,
|
58 |
+
align_corners=False,
|
59 |
+
loss_decode=dict(
|
60 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
61 |
+
dict(
|
62 |
+
type='FCNHead',
|
63 |
+
in_channels=128,
|
64 |
+
channels=64,
|
65 |
+
num_convs=1,
|
66 |
+
num_classes=19,
|
67 |
+
in_index=2,
|
68 |
+
norm_cfg=norm_cfg,
|
69 |
+
concat_input=False,
|
70 |
+
align_corners=False,
|
71 |
+
loss_decode=dict(
|
72 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
73 |
+
],
|
74 |
+
# model training and testing settings
|
75 |
+
train_cfg=dict(),
|
76 |
+
test_cfg=dict(mode='whole'))
|
configs/_base_/models/bisenetv2.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type='SegDataPreProcessor',
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255)
|
10 |
+
model = dict(
|
11 |
+
type='EncoderDecoder',
|
12 |
+
data_preprocessor=data_preprocessor,
|
13 |
+
pretrained=None,
|
14 |
+
backbone=dict(
|
15 |
+
type='BiSeNetV2',
|
16 |
+
detail_channels=(64, 64, 128),
|
17 |
+
semantic_channels=(16, 32, 64, 128),
|
18 |
+
semantic_expansion_ratio=6,
|
19 |
+
bga_channels=128,
|
20 |
+
out_indices=(0, 1, 2, 3, 4),
|
21 |
+
init_cfg=None,
|
22 |
+
align_corners=False),
|
23 |
+
decode_head=dict(
|
24 |
+
type='FCNHead',
|
25 |
+
in_channels=128,
|
26 |
+
in_index=0,
|
27 |
+
channels=1024,
|
28 |
+
num_convs=1,
|
29 |
+
concat_input=False,
|
30 |
+
dropout_ratio=0.1,
|
31 |
+
num_classes=19,
|
32 |
+
norm_cfg=norm_cfg,
|
33 |
+
align_corners=False,
|
34 |
+
loss_decode=dict(
|
35 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
36 |
+
auxiliary_head=[
|
37 |
+
dict(
|
38 |
+
type='FCNHead',
|
39 |
+
in_channels=16,
|
40 |
+
channels=16,
|
41 |
+
num_convs=2,
|
42 |
+
num_classes=19,
|
43 |
+
in_index=1,
|
44 |
+
norm_cfg=norm_cfg,
|
45 |
+
concat_input=False,
|
46 |
+
align_corners=False,
|
47 |
+
loss_decode=dict(
|
48 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
49 |
+
dict(
|
50 |
+
type='FCNHead',
|
51 |
+
in_channels=32,
|
52 |
+
channels=64,
|
53 |
+
num_convs=2,
|
54 |
+
num_classes=19,
|
55 |
+
in_index=2,
|
56 |
+
norm_cfg=norm_cfg,
|
57 |
+
concat_input=False,
|
58 |
+
align_corners=False,
|
59 |
+
loss_decode=dict(
|
60 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
61 |
+
dict(
|
62 |
+
type='FCNHead',
|
63 |
+
in_channels=64,
|
64 |
+
channels=256,
|
65 |
+
num_convs=2,
|
66 |
+
num_classes=19,
|
67 |
+
in_index=3,
|
68 |
+
norm_cfg=norm_cfg,
|
69 |
+
concat_input=False,
|
70 |
+
align_corners=False,
|
71 |
+
loss_decode=dict(
|
72 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
73 |
+
dict(
|
74 |
+
type='FCNHead',
|
75 |
+
in_channels=128,
|
76 |
+
channels=1024,
|
77 |
+
num_convs=2,
|
78 |
+
num_classes=19,
|
79 |
+
in_index=4,
|
80 |
+
norm_cfg=norm_cfg,
|
81 |
+
concat_input=False,
|
82 |
+
align_corners=False,
|
83 |
+
loss_decode=dict(
|
84 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
85 |
+
],
|
86 |
+
# model training and testing settings
|
87 |
+
train_cfg=dict(),
|
88 |
+
test_cfg=dict(mode='whole'))
|
configs/_base_/models/ccnet_r50-d8.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type='SegDataPreProcessor',
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255)
|
10 |
+
model = dict(
|
11 |
+
type='EncoderDecoder',
|
12 |
+
data_preprocessor=data_preprocessor,
|
13 |
+
pretrained='open-mmlab://resnet50_v1c',
|
14 |
+
backbone=dict(
|
15 |
+
type='ResNetV1c',
|
16 |
+
depth=50,
|
17 |
+
num_stages=4,
|
18 |
+
out_indices=(0, 1, 2, 3),
|
19 |
+
dilations=(1, 1, 2, 4),
|
20 |
+
strides=(1, 2, 1, 1),
|
21 |
+
norm_cfg=norm_cfg,
|
22 |
+
norm_eval=False,
|
23 |
+
style='pytorch',
|
24 |
+
contract_dilation=True),
|
25 |
+
decode_head=dict(
|
26 |
+
type='CCHead',
|
27 |
+
in_channels=2048,
|
28 |
+
in_index=3,
|
29 |
+
channels=512,
|
30 |
+
recurrence=2,
|
31 |
+
dropout_ratio=0.1,
|
32 |
+
num_classes=19,
|
33 |
+
norm_cfg=norm_cfg,
|
34 |
+
align_corners=False,
|
35 |
+
loss_decode=dict(
|
36 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
37 |
+
auxiliary_head=dict(
|
38 |
+
type='FCNHead',
|
39 |
+
in_channels=1024,
|
40 |
+
in_index=2,
|
41 |
+
channels=256,
|
42 |
+
num_convs=1,
|
43 |
+
concat_input=False,
|
44 |
+
dropout_ratio=0.1,
|
45 |
+
num_classes=19,
|
46 |
+
norm_cfg=norm_cfg,
|
47 |
+
align_corners=False,
|
48 |
+
loss_decode=dict(
|
49 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
50 |
+
# model training and testing settings
|
51 |
+
train_cfg=dict(),
|
52 |
+
test_cfg=dict(mode='whole'))
|
configs/_base_/models/cgnet.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type='SegDataPreProcessor',
|
5 |
+
mean=[72.39239876, 82.90891754, 73.15835921],
|
6 |
+
std=[1, 1, 1],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255)
|
10 |
+
model = dict(
|
11 |
+
type='EncoderDecoder',
|
12 |
+
data_preprocessor=data_preprocessor,
|
13 |
+
backbone=dict(
|
14 |
+
type='CGNet',
|
15 |
+
norm_cfg=norm_cfg,
|
16 |
+
in_channels=3,
|
17 |
+
num_channels=(32, 64, 128),
|
18 |
+
num_blocks=(3, 21),
|
19 |
+
dilations=(2, 4),
|
20 |
+
reductions=(8, 16)),
|
21 |
+
decode_head=dict(
|
22 |
+
type='FCNHead',
|
23 |
+
in_channels=256,
|
24 |
+
in_index=2,
|
25 |
+
channels=256,
|
26 |
+
num_convs=0,
|
27 |
+
concat_input=False,
|
28 |
+
dropout_ratio=0,
|
29 |
+
num_classes=19,
|
30 |
+
norm_cfg=norm_cfg,
|
31 |
+
loss_decode=dict(
|
32 |
+
type='CrossEntropyLoss',
|
33 |
+
use_sigmoid=False,
|
34 |
+
loss_weight=1.0,
|
35 |
+
class_weight=[
|
36 |
+
2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352,
|
37 |
+
10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905,
|
38 |
+
10.347791, 6.3927646, 10.226669, 10.241062, 10.280587,
|
39 |
+
10.396974, 10.055647
|
40 |
+
])),
|
41 |
+
# model training and testing settings
|
42 |
+
train_cfg=dict(sampler=None),
|
43 |
+
test_cfg=dict(mode='whole'))
|
configs/_base_/models/danet_r50-d8.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type='SegDataPreProcessor',
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255)
|
10 |
+
model = dict(
|
11 |
+
type='EncoderDecoder',
|
12 |
+
data_preprocessor=data_preprocessor,
|
13 |
+
pretrained='open-mmlab://resnet50_v1c',
|
14 |
+
backbone=dict(
|
15 |
+
type='ResNetV1c',
|
16 |
+
depth=50,
|
17 |
+
num_stages=4,
|
18 |
+
out_indices=(0, 1, 2, 3),
|
19 |
+
dilations=(1, 1, 2, 4),
|
20 |
+
strides=(1, 2, 1, 1),
|
21 |
+
norm_cfg=norm_cfg,
|
22 |
+
norm_eval=False,
|
23 |
+
style='pytorch',
|
24 |
+
contract_dilation=True),
|
25 |
+
decode_head=dict(
|
26 |
+
type='DAHead',
|
27 |
+
in_channels=2048,
|
28 |
+
in_index=3,
|
29 |
+
channels=512,
|
30 |
+
pam_channels=64,
|
31 |
+
dropout_ratio=0.1,
|
32 |
+
num_classes=19,
|
33 |
+
norm_cfg=norm_cfg,
|
34 |
+
align_corners=False,
|
35 |
+
loss_decode=dict(
|
36 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
37 |
+
auxiliary_head=dict(
|
38 |
+
type='FCNHead',
|
39 |
+
in_channels=1024,
|
40 |
+
in_index=2,
|
41 |
+
channels=256,
|
42 |
+
num_convs=1,
|
43 |
+
concat_input=False,
|
44 |
+
dropout_ratio=0.1,
|
45 |
+
num_classes=19,
|
46 |
+
norm_cfg=norm_cfg,
|
47 |
+
align_corners=False,
|
48 |
+
loss_decode=dict(
|
49 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
50 |
+
# model training and testing settings
|
51 |
+
train_cfg=dict(),
|
52 |
+
test_cfg=dict(mode='whole'))
|
configs/_base_/models/deeplabv3_r50-d8.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type='SegDataPreProcessor',
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255)
|
10 |
+
model = dict(
|
11 |
+
type='EncoderDecoder',
|
12 |
+
data_preprocessor=data_preprocessor,
|
13 |
+
pretrained='open-mmlab://resnet50_v1c',
|
14 |
+
backbone=dict(
|
15 |
+
type='ResNetV1c',
|
16 |
+
depth=50,
|
17 |
+
num_stages=4,
|
18 |
+
out_indices=(0, 1, 2, 3),
|
19 |
+
dilations=(1, 1, 2, 4),
|
20 |
+
strides=(1, 2, 1, 1),
|
21 |
+
norm_cfg=norm_cfg,
|
22 |
+
norm_eval=False,
|
23 |
+
style='pytorch',
|
24 |
+
contract_dilation=True),
|
25 |
+
decode_head=dict(
|
26 |
+
type='ASPPHead',
|
27 |
+
in_channels=2048,
|
28 |
+
in_index=3,
|
29 |
+
channels=512,
|
30 |
+
dilations=(1, 12, 24, 36),
|
31 |
+
dropout_ratio=0.1,
|
32 |
+
num_classes=19,
|
33 |
+
norm_cfg=norm_cfg,
|
34 |
+
align_corners=False,
|
35 |
+
loss_decode=dict(
|
36 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
37 |
+
auxiliary_head=dict(
|
38 |
+
type='FCNHead',
|
39 |
+
in_channels=1024,
|
40 |
+
in_index=2,
|
41 |
+
channels=256,
|
42 |
+
num_convs=1,
|
43 |
+
concat_input=False,
|
44 |
+
dropout_ratio=0.1,
|
45 |
+
num_classes=19,
|
46 |
+
norm_cfg=norm_cfg,
|
47 |
+
align_corners=False,
|
48 |
+
loss_decode=dict(
|
49 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
50 |
+
# model training and testing settings
|
51 |
+
train_cfg=dict(),
|
52 |
+
test_cfg=dict(mode='whole'))
|
configs/_base_/models/deeplabv3_unet_s5-d16.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type='SegDataPreProcessor',
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255)
|
10 |
+
model = dict(
|
11 |
+
type='EncoderDecoder',
|
12 |
+
data_preprocessor=data_preprocessor,
|
13 |
+
pretrained=None,
|
14 |
+
backbone=dict(
|
15 |
+
type='UNet',
|
16 |
+
in_channels=3,
|
17 |
+
base_channels=64,
|
18 |
+
num_stages=5,
|
19 |
+
strides=(1, 1, 1, 1, 1),
|
20 |
+
enc_num_convs=(2, 2, 2, 2, 2),
|
21 |
+
dec_num_convs=(2, 2, 2, 2),
|
22 |
+
downsamples=(True, True, True, True),
|
23 |
+
enc_dilations=(1, 1, 1, 1, 1),
|
24 |
+
dec_dilations=(1, 1, 1, 1),
|
25 |
+
with_cp=False,
|
26 |
+
conv_cfg=None,
|
27 |
+
norm_cfg=norm_cfg,
|
28 |
+
act_cfg=dict(type='ReLU'),
|
29 |
+
upsample_cfg=dict(type='InterpConv'),
|
30 |
+
norm_eval=False),
|
31 |
+
decode_head=dict(
|
32 |
+
type='ASPPHead',
|
33 |
+
in_channels=64,
|
34 |
+
in_index=4,
|
35 |
+
channels=16,
|
36 |
+
dilations=(1, 12, 24, 36),
|
37 |
+
dropout_ratio=0.1,
|
38 |
+
num_classes=2,
|
39 |
+
norm_cfg=norm_cfg,
|
40 |
+
align_corners=False,
|
41 |
+
loss_decode=dict(
|
42 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
43 |
+
auxiliary_head=dict(
|
44 |
+
type='FCNHead',
|
45 |
+
in_channels=128,
|
46 |
+
in_index=3,
|
47 |
+
channels=64,
|
48 |
+
num_convs=1,
|
49 |
+
concat_input=False,
|
50 |
+
dropout_ratio=0.1,
|
51 |
+
num_classes=2,
|
52 |
+
norm_cfg=norm_cfg,
|
53 |
+
align_corners=False,
|
54 |
+
loss_decode=dict(
|
55 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
56 |
+
# model training and testing settings
|
57 |
+
train_cfg=dict(),
|
58 |
+
test_cfg=dict(mode='slide', crop_size=256, stride=170))
|
configs/_base_/models/deeplabv3plus_r50-d8.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type='SegDataPreProcessor',
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255)
|
10 |
+
model = dict(
|
11 |
+
type='EncoderDecoder',
|
12 |
+
data_preprocessor=data_preprocessor,
|
13 |
+
pretrained='open-mmlab://resnet50_v1c',
|
14 |
+
backbone=dict(
|
15 |
+
type='ResNetV1c',
|
16 |
+
depth=50,
|
17 |
+
num_stages=4,
|
18 |
+
out_indices=(0, 1, 2, 3),
|
19 |
+
dilations=(1, 1, 2, 4),
|
20 |
+
strides=(1, 2, 1, 1),
|
21 |
+
norm_cfg=norm_cfg,
|
22 |
+
norm_eval=False,
|
23 |
+
style='pytorch',
|
24 |
+
contract_dilation=True),
|
25 |
+
decode_head=dict(
|
26 |
+
type='DepthwiseSeparableASPPHead',
|
27 |
+
in_channels=2048,
|
28 |
+
in_index=3,
|
29 |
+
channels=512,
|
30 |
+
dilations=(1, 12, 24, 36),
|
31 |
+
c1_in_channels=256,
|
32 |
+
c1_channels=48,
|
33 |
+
dropout_ratio=0.1,
|
34 |
+
num_classes=19,
|
35 |
+
norm_cfg=norm_cfg,
|
36 |
+
align_corners=False,
|
37 |
+
loss_decode=dict(
|
38 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
39 |
+
auxiliary_head=dict(
|
40 |
+
type='FCNHead',
|
41 |
+
in_channels=1024,
|
42 |
+
in_index=2,
|
43 |
+
channels=256,
|
44 |
+
num_convs=1,
|
45 |
+
concat_input=False,
|
46 |
+
dropout_ratio=0.1,
|
47 |
+
num_classes=19,
|
48 |
+
norm_cfg=norm_cfg,
|
49 |
+
align_corners=False,
|
50 |
+
loss_decode=dict(
|
51 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
52 |
+
# model training and testing settings
|
53 |
+
train_cfg=dict(),
|
54 |
+
test_cfg=dict(mode='whole'))
|
configs/_base_/models/dmnet_r50-d8.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type='SegDataPreProcessor',
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255)
|
10 |
+
model = dict(
|
11 |
+
type='EncoderDecoder',
|
12 |
+
data_preprocessor=data_preprocessor,
|
13 |
+
pretrained='open-mmlab://resnet50_v1c',
|
14 |
+
backbone=dict(
|
15 |
+
type='ResNetV1c',
|
16 |
+
depth=50,
|
17 |
+
num_stages=4,
|
18 |
+
out_indices=(0, 1, 2, 3),
|
19 |
+
dilations=(1, 1, 2, 4),
|
20 |
+
strides=(1, 2, 1, 1),
|
21 |
+
norm_cfg=norm_cfg,
|
22 |
+
norm_eval=False,
|
23 |
+
style='pytorch',
|
24 |
+
contract_dilation=True),
|
25 |
+
decode_head=dict(
|
26 |
+
type='DMHead',
|
27 |
+
in_channels=2048,
|
28 |
+
in_index=3,
|
29 |
+
channels=512,
|
30 |
+
filter_sizes=(1, 3, 5, 7),
|
31 |
+
dropout_ratio=0.1,
|
32 |
+
num_classes=19,
|
33 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
34 |
+
align_corners=False,
|
35 |
+
loss_decode=dict(
|
36 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
37 |
+
auxiliary_head=dict(
|
38 |
+
type='FCNHead',
|
39 |
+
in_channels=1024,
|
40 |
+
in_index=2,
|
41 |
+
channels=256,
|
42 |
+
num_convs=1,
|
43 |
+
concat_input=False,
|
44 |
+
dropout_ratio=0.1,
|
45 |
+
num_classes=19,
|
46 |
+
norm_cfg=norm_cfg,
|
47 |
+
align_corners=False,
|
48 |
+
loss_decode=dict(
|
49 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
50 |
+
# model training and testing settings
|
51 |
+
train_cfg=dict(),
|
52 |
+
test_cfg=dict(mode='whole'))
|
configs/_base_/models/dnl_r50-d8.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type='SegDataPreProcessor',
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255)
|
10 |
+
model = dict(
|
11 |
+
type='EncoderDecoder',
|
12 |
+
data_preprocessor=data_preprocessor,
|
13 |
+
pretrained='open-mmlab://resnet50_v1c',
|
14 |
+
backbone=dict(
|
15 |
+
type='ResNetV1c',
|
16 |
+
depth=50,
|
17 |
+
num_stages=4,
|
18 |
+
out_indices=(0, 1, 2, 3),
|
19 |
+
dilations=(1, 1, 2, 4),
|
20 |
+
strides=(1, 2, 1, 1),
|
21 |
+
norm_cfg=norm_cfg,
|
22 |
+
norm_eval=False,
|
23 |
+
style='pytorch',
|
24 |
+
contract_dilation=True),
|
25 |
+
decode_head=dict(
|
26 |
+
type='DNLHead',
|
27 |
+
in_channels=2048,
|
28 |
+
in_index=3,
|
29 |
+
channels=512,
|
30 |
+
dropout_ratio=0.1,
|
31 |
+
reduction=2,
|
32 |
+
use_scale=True,
|
33 |
+
mode='embedded_gaussian',
|
34 |
+
num_classes=19,
|
35 |
+
norm_cfg=norm_cfg,
|
36 |
+
align_corners=False,
|
37 |
+
loss_decode=dict(
|
38 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
39 |
+
auxiliary_head=dict(
|
40 |
+
type='FCNHead',
|
41 |
+
in_channels=1024,
|
42 |
+
in_index=2,
|
43 |
+
channels=256,
|
44 |
+
num_convs=1,
|
45 |
+
concat_input=False,
|
46 |
+
dropout_ratio=0.1,
|
47 |
+
num_classes=19,
|
48 |
+
norm_cfg=norm_cfg,
|
49 |
+
align_corners=False,
|
50 |
+
loss_decode=dict(
|
51 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
52 |
+
# model training and testing settings
|
53 |
+
train_cfg=dict(),
|
54 |
+
test_cfg=dict(mode='whole'))
|
configs/_base_/models/dpt_vit-b16.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
2 |
+
data_preprocessor = dict(
|
3 |
+
type='SegDataPreProcessor',
|
4 |
+
mean=[123.675, 116.28, 103.53],
|
5 |
+
std=[58.395, 57.12, 57.375],
|
6 |
+
bgr_to_rgb=True,
|
7 |
+
pad_val=0,
|
8 |
+
seg_pad_val=255)
|
9 |
+
model = dict(
|
10 |
+
type='EncoderDecoder',
|
11 |
+
data_preprocessor=data_preprocessor,
|
12 |
+
pretrained='pretrain/vit-b16_p16_224-80ecf9dd.pth', # noqa
|
13 |
+
backbone=dict(
|
14 |
+
type='VisionTransformer',
|
15 |
+
img_size=224,
|
16 |
+
embed_dims=768,
|
17 |
+
num_layers=12,
|
18 |
+
num_heads=12,
|
19 |
+
out_indices=(2, 5, 8, 11),
|
20 |
+
final_norm=False,
|
21 |
+
with_cls_token=True,
|
22 |
+
output_cls_token=True),
|
23 |
+
decode_head=dict(
|
24 |
+
type='DPTHead',
|
25 |
+
in_channels=(768, 768, 768, 768),
|
26 |
+
channels=256,
|
27 |
+
embed_dims=768,
|
28 |
+
post_process_channels=[96, 192, 384, 768],
|
29 |
+
num_classes=150,
|
30 |
+
readout_type='project',
|
31 |
+
input_transform='multiple_select',
|
32 |
+
in_index=(0, 1, 2, 3),
|
33 |
+
norm_cfg=norm_cfg,
|
34 |
+
loss_decode=dict(
|
35 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
36 |
+
auxiliary_head=None,
|
37 |
+
# model training and testing settings
|
38 |
+
train_cfg=dict(),
|
39 |
+
test_cfg=dict(mode='whole')) # yapf: disable
|
configs/_base_/models/emanet_r50-d8.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type='SegDataPreProcessor',
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255)
|
10 |
+
model = dict(
|
11 |
+
type='EncoderDecoder',
|
12 |
+
data_preprocessor=data_preprocessor,
|
13 |
+
pretrained='open-mmlab://resnet50_v1c',
|
14 |
+
backbone=dict(
|
15 |
+
type='ResNetV1c',
|
16 |
+
depth=50,
|
17 |
+
num_stages=4,
|
18 |
+
out_indices=(0, 1, 2, 3),
|
19 |
+
dilations=(1, 1, 2, 4),
|
20 |
+
strides=(1, 2, 1, 1),
|
21 |
+
norm_cfg=norm_cfg,
|
22 |
+
norm_eval=False,
|
23 |
+
style='pytorch',
|
24 |
+
contract_dilation=True),
|
25 |
+
decode_head=dict(
|
26 |
+
type='EMAHead',
|
27 |
+
in_channels=2048,
|
28 |
+
in_index=3,
|
29 |
+
channels=256,
|
30 |
+
ema_channels=512,
|
31 |
+
num_bases=64,
|
32 |
+
num_stages=3,
|
33 |
+
momentum=0.1,
|
34 |
+
dropout_ratio=0.1,
|
35 |
+
num_classes=19,
|
36 |
+
norm_cfg=norm_cfg,
|
37 |
+
align_corners=False,
|
38 |
+
loss_decode=dict(
|
39 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
40 |
+
auxiliary_head=dict(
|
41 |
+
type='FCNHead',
|
42 |
+
in_channels=1024,
|
43 |
+
in_index=2,
|
44 |
+
channels=256,
|
45 |
+
num_convs=1,
|
46 |
+
concat_input=False,
|
47 |
+
dropout_ratio=0.1,
|
48 |
+
num_classes=19,
|
49 |
+
norm_cfg=norm_cfg,
|
50 |
+
align_corners=False,
|
51 |
+
loss_decode=dict(
|
52 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
53 |
+
# model training and testing settings
|
54 |
+
train_cfg=dict(),
|
55 |
+
test_cfg=dict(mode='whole'))
|
configs/_base_/models/encnet_r50-d8.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type='SegDataPreProcessor',
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255)
|
10 |
+
model = dict(
|
11 |
+
type='EncoderDecoder',
|
12 |
+
data_preprocessor=data_preprocessor,
|
13 |
+
pretrained='open-mmlab://resnet50_v1c',
|
14 |
+
backbone=dict(
|
15 |
+
type='ResNetV1c',
|
16 |
+
depth=50,
|
17 |
+
num_stages=4,
|
18 |
+
out_indices=(0, 1, 2, 3),
|
19 |
+
dilations=(1, 1, 2, 4),
|
20 |
+
strides=(1, 2, 1, 1),
|
21 |
+
norm_cfg=norm_cfg,
|
22 |
+
norm_eval=False,
|
23 |
+
style='pytorch',
|
24 |
+
contract_dilation=True),
|
25 |
+
decode_head=dict(
|
26 |
+
type='EncHead',
|
27 |
+
in_channels=[512, 1024, 2048],
|
28 |
+
in_index=(1, 2, 3),
|
29 |
+
channels=512,
|
30 |
+
num_codes=32,
|
31 |
+
use_se_loss=True,
|
32 |
+
add_lateral=False,
|
33 |
+
dropout_ratio=0.1,
|
34 |
+
num_classes=19,
|
35 |
+
norm_cfg=norm_cfg,
|
36 |
+
align_corners=False,
|
37 |
+
loss_decode=dict(
|
38 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
39 |
+
loss_se_decode=dict(
|
40 |
+
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)),
|
41 |
+
auxiliary_head=dict(
|
42 |
+
type='FCNHead',
|
43 |
+
in_channels=1024,
|
44 |
+
in_index=2,
|
45 |
+
channels=256,
|
46 |
+
num_convs=1,
|
47 |
+
concat_input=False,
|
48 |
+
dropout_ratio=0.1,
|
49 |
+
num_classes=19,
|
50 |
+
norm_cfg=norm_cfg,
|
51 |
+
align_corners=False,
|
52 |
+
loss_decode=dict(
|
53 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
54 |
+
# model training and testing settings
|
55 |
+
train_cfg=dict(),
|
56 |
+
test_cfg=dict(mode='whole'))
|
configs/_base_/models/erfnet_fcn.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type='SegDataPreProcessor',
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255)
|
10 |
+
model = dict(
|
11 |
+
type='EncoderDecoder',
|
12 |
+
data_preprocessor=data_preprocessor,
|
13 |
+
pretrained=None,
|
14 |
+
backbone=dict(
|
15 |
+
type='ERFNet',
|
16 |
+
in_channels=3,
|
17 |
+
enc_downsample_channels=(16, 64, 128),
|
18 |
+
enc_stage_non_bottlenecks=(5, 8),
|
19 |
+
enc_non_bottleneck_dilations=(2, 4, 8, 16),
|
20 |
+
enc_non_bottleneck_channels=(64, 128),
|
21 |
+
dec_upsample_channels=(64, 16),
|
22 |
+
dec_stages_non_bottleneck=(2, 2),
|
23 |
+
dec_non_bottleneck_channels=(64, 16),
|
24 |
+
dropout_ratio=0.1,
|
25 |
+
init_cfg=None),
|
26 |
+
decode_head=dict(
|
27 |
+
type='FCNHead',
|
28 |
+
in_channels=16,
|
29 |
+
channels=128,
|
30 |
+
num_convs=1,
|
31 |
+
concat_input=False,
|
32 |
+
dropout_ratio=0.1,
|
33 |
+
num_classes=19,
|
34 |
+
norm_cfg=norm_cfg,
|
35 |
+
align_corners=False,
|
36 |
+
loss_decode=dict(
|
37 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
38 |
+
# model training and testing settings
|
39 |
+
train_cfg=dict(),
|
40 |
+
test_cfg=dict(mode='whole'))
|
configs/_base_/models/fast_scnn.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type='SegDataPreProcessor',
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255)
|
10 |
+
model = dict(
|
11 |
+
type='EncoderDecoder',
|
12 |
+
data_preprocessor=data_preprocessor,
|
13 |
+
backbone=dict(
|
14 |
+
type='FastSCNN',
|
15 |
+
downsample_dw_channels=(32, 48),
|
16 |
+
global_in_channels=64,
|
17 |
+
global_block_channels=(64, 96, 128),
|
18 |
+
global_block_strides=(2, 2, 1),
|
19 |
+
global_out_channels=128,
|
20 |
+
higher_in_channels=64,
|
21 |
+
lower_in_channels=128,
|
22 |
+
fusion_out_channels=128,
|
23 |
+
out_indices=(0, 1, 2),
|
24 |
+
norm_cfg=norm_cfg,
|
25 |
+
align_corners=False),
|
26 |
+
decode_head=dict(
|
27 |
+
type='DepthwiseSeparableFCNHead',
|
28 |
+
in_channels=128,
|
29 |
+
channels=128,
|
30 |
+
concat_input=False,
|
31 |
+
num_classes=19,
|
32 |
+
in_index=-1,
|
33 |
+
norm_cfg=norm_cfg,
|
34 |
+
align_corners=False,
|
35 |
+
loss_decode=dict(
|
36 |
+
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1)),
|
37 |
+
auxiliary_head=[
|
38 |
+
dict(
|
39 |
+
type='FCNHead',
|
40 |
+
in_channels=128,
|
41 |
+
channels=32,
|
42 |
+
num_convs=1,
|
43 |
+
num_classes=19,
|
44 |
+
in_index=-2,
|
45 |
+
norm_cfg=norm_cfg,
|
46 |
+
concat_input=False,
|
47 |
+
align_corners=False,
|
48 |
+
loss_decode=dict(
|
49 |
+
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
|
50 |
+
dict(
|
51 |
+
type='FCNHead',
|
52 |
+
in_channels=64,
|
53 |
+
channels=32,
|
54 |
+
num_convs=1,
|
55 |
+
num_classes=19,
|
56 |
+
in_index=-3,
|
57 |
+
norm_cfg=norm_cfg,
|
58 |
+
concat_input=False,
|
59 |
+
align_corners=False,
|
60 |
+
loss_decode=dict(
|
61 |
+
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
|
62 |
+
],
|
63 |
+
# model training and testing settings
|
64 |
+
train_cfg=dict(),
|
65 |
+
test_cfg=dict(mode='whole'))
|