HubHop commited on
Commit
412c852
·
1 Parent(s): f82f114
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README copy.md +13 -0
  2. app.py +251 -0
  3. configs/_base_/datasets/ade20k.py +68 -0
  4. configs/_base_/datasets/ade20k_640x640.py +68 -0
  5. configs/_base_/datasets/bdd100k.py +70 -0
  6. configs/_base_/datasets/chase_db1.py +75 -0
  7. configs/_base_/datasets/cityscapes.py +67 -0
  8. configs/_base_/datasets/cityscapes_1024x1024.py +29 -0
  9. configs/_base_/datasets/cityscapes_768x768.py +29 -0
  10. configs/_base_/datasets/cityscapes_769x769.py +29 -0
  11. configs/_base_/datasets/cityscapes_832x832.py +29 -0
  12. configs/_base_/datasets/coco-stuff10k.py +69 -0
  13. configs/_base_/datasets/coco-stuff164k.py +67 -0
  14. configs/_base_/datasets/drive.py +73 -0
  15. configs/_base_/datasets/hrf.py +73 -0
  16. configs/_base_/datasets/isaid.py +73 -0
  17. configs/_base_/datasets/levir_256x256.py +59 -0
  18. configs/_base_/datasets/loveda.py +66 -0
  19. configs/_base_/datasets/mapillary_v1.py +68 -0
  20. configs/_base_/datasets/mapillary_v1_65.py +37 -0
  21. configs/_base_/datasets/mapillary_v2.py +68 -0
  22. configs/_base_/datasets/nyu.py +67 -0
  23. configs/_base_/datasets/nyu_512x512.py +72 -0
  24. configs/_base_/datasets/pascal_context.py +56 -0
  25. configs/_base_/datasets/pascal_context_59.py +72 -0
  26. configs/_base_/datasets/pascal_voc12.py +69 -0
  27. configs/_base_/datasets/pascal_voc12_aug.py +81 -0
  28. configs/_base_/datasets/potsdam.py +66 -0
  29. configs/_base_/datasets/refuge.py +90 -0
  30. configs/_base_/datasets/stare.py +73 -0
  31. configs/_base_/datasets/synapse.py +41 -0
  32. configs/_base_/datasets/vaihingen.py +66 -0
  33. configs/_base_/default_runtime.py +15 -0
  34. configs/_base_/models/ann_r50-d8.py +54 -0
  35. configs/_base_/models/apcnet_r50-d8.py +52 -0
  36. configs/_base_/models/bisenetv1_r18-d32.py +76 -0
  37. configs/_base_/models/bisenetv2.py +88 -0
  38. configs/_base_/models/ccnet_r50-d8.py +52 -0
  39. configs/_base_/models/cgnet.py +43 -0
  40. configs/_base_/models/danet_r50-d8.py +52 -0
  41. configs/_base_/models/deeplabv3_r50-d8.py +52 -0
  42. configs/_base_/models/deeplabv3_unet_s5-d16.py +58 -0
  43. configs/_base_/models/deeplabv3plus_r50-d8.py +54 -0
  44. configs/_base_/models/dmnet_r50-d8.py +52 -0
  45. configs/_base_/models/dnl_r50-d8.py +54 -0
  46. configs/_base_/models/dpt_vit-b16.py +39 -0
  47. configs/_base_/models/emanet_r50-d8.py +55 -0
  48. configs/_base_/models/encnet_r50-d8.py +56 -0
  49. configs/_base_/models/erfnet_fcn.py +40 -0
  50. configs/_base_/models/fast_scnn.py +65 -0
README copy.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Snnetv2 Semantic Segmentation
3
+ emoji: 🐨
4
+ colorFrom: green
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 4.14.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from argparse import ArgumentParser
3
+
4
+ import cv2
5
+ from mmengine.model.utils import revert_sync_batchnorm
6
+
7
+ from mmseg.apis import inference_model, init_model
8
+ from mmseg.apis.inference import show_result_pyplot
9
+ import torch
10
+ import time
11
+ import gradio as gr
12
+ import plotly.express as px
13
+ import json
14
+
15
+ def main():
16
+ parser = ArgumentParser()
17
+ parser.add_argument('--config', default='configs/snnet/setr_naive_512x512_160k_b16_ade20k_deit_3_s_l_224_snnetv2.py', help='Config file')
18
+ parser.add_argument('--checkpoint', help='Checkpoint file', default='setr_naive_512x512_160k_b16_ade20k_snnetv2_deit3_s_l_lora_16_iter_160000.pth')
19
+ # parser.add_argument('--video', help='Video file or webcam id')
20
+
21
+ parser.add_argument(
22
+ '--device', default='cuda:0', help='Device used for inference')
23
+ parser.add_argument(
24
+ '--palette',
25
+ default='cityscapes',
26
+ help='Color palette used for segmentation map')
27
+ parser.add_argument(
28
+ '--show', action='store_true', help='Whether to show draw result')
29
+ parser.add_argument(
30
+ '--show-wait-time', default=1, type=int, help='Wait time after imshow')
31
+ parser.add_argument(
32
+ '--output-file', default=None, type=str, help='Output video file path')
33
+ parser.add_argument(
34
+ '--output-fourcc',
35
+ default='MJPG',
36
+ type=str,
37
+ help='Fourcc of the output video')
38
+ parser.add_argument(
39
+ '--output-fps', default=30, type=int, help='FPS of the output video')
40
+ parser.add_argument(
41
+ '--output-height',
42
+ default=-1,
43
+ type=int,
44
+ help='Frame height of the output video')
45
+ parser.add_argument(
46
+ '--output-width',
47
+ default=-1,
48
+ type=int,
49
+ help='Frame width of the output video')
50
+ parser.add_argument(
51
+ '--opacity',
52
+ type=float,
53
+ default=0.5,
54
+ help='Opacity of painted segmentation map. In (0, 1] range.')
55
+ args = parser.parse_args()
56
+
57
+ # build the model from a config file and a checkpoint file
58
+ model = init_model(args.config, args.checkpoint, device=args.device)
59
+ if args.device == 'cpu':
60
+ model = revert_sync_batchnorm(model)
61
+
62
+ from mmseg.models.backbones.snnet import get_stitch_configs_bidirection
63
+ stitch_configs_info, _, _, anchor_ids, sl_ids, ls_ids, lsl_ids, sls_ids = get_stitch_configs_bidirection([12, 24])
64
+
65
+ stitch_configs_info = {i: cfg for i, cfg in enumerate(stitch_configs_info)}
66
+
67
+
68
+ with open('./model_flops/snnet_flops_setr_naive_512x512_160k_b16_ade20k_deit_3_s_l_224_snnetv2.json', 'r') as f:
69
+ flops_params = json.load(f)
70
+
71
+ with open('./results/eval_single_scale_20230507_235400.json', 'r') as f:
72
+ results = json.load(f)
73
+
74
+ config_ids = list(results.keys())
75
+ flops_res = {}
76
+ eval_res = {}
77
+ total_data = {}
78
+ for i, cfg_id in enumerate(config_ids):
79
+ flops = flops_params[cfg_id]
80
+ miou_res = results[cfg_id]['metric']['mIoU'] * 100
81
+ eval_res[int(cfg_id)] = miou_res
82
+ flops_res[int(cfg_id)] = flops / 1e9
83
+ total_data[int(cfg_id)] = [flops // 1e9, miou_res]
84
+
85
+
86
+ def visualize_stitch_pos(stitch_id):
87
+ if stitch_id == 13:
88
+ # 13 is equivalent to 0
89
+ stitch_id = 0
90
+
91
+ names = [f'ID {key}' for key in flops_res.keys()]
92
+
93
+ fig = px.scatter(x=flops_res.values(), y=eval_res.values(), hover_name=names)
94
+ fig.update_layout(
95
+ title=f"SN-Netv2 - Stitch ID - {stitch_id}",
96
+ title_x=0.5,
97
+ xaxis_title="GFLOPs",
98
+ yaxis_title="mIoU",
99
+ font=dict(
100
+ family="Courier New, monospace",
101
+ size=18,
102
+ color="RebeccaPurple"
103
+ ),
104
+ legend=dict(
105
+ yanchor="bottom",
106
+ y=0.99,
107
+ xanchor="left",
108
+ x=0.01),
109
+ )
110
+ # continent, DarkSlateGrey
111
+ fig.update_traces(marker=dict(size=10,
112
+ line=dict(width=2)),
113
+ selector=dict(mode='markers'))
114
+
115
+ fig.add_scatter(x=[flops_res[stitch_id]], y=[eval_res[stitch_id]], mode='markers', marker=dict(size=15), name='Current Stitch')
116
+ return fig
117
+
118
+
119
+ def segment_video(video, stitch_id):
120
+
121
+ if stitch_id == 13:
122
+ # 13 is equivalent to 0
123
+ stitch_id = 0
124
+
125
+ model.backbone.reset_stitch_id(stitch_id)
126
+ output_video_path = './temp_video.avi'
127
+ cap = cv2.VideoCapture(video)
128
+ assert (cap.isOpened())
129
+ input_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
130
+ input_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
131
+ input_fps = cap.get(cv2.CAP_PROP_FPS)
132
+
133
+
134
+ fourcc = cv2.VideoWriter_fourcc(*args.output_fourcc)
135
+ output_fps = args.output_fps if args.output_fps > 0 else input_fps
136
+ output_height = args.output_height if args.output_height > 0 else int(
137
+ input_height)
138
+ output_width = args.output_width if args.output_width > 0 else int(
139
+ input_width)
140
+ writer = cv2.VideoWriter(output_video_path, fourcc, output_fps,
141
+ (output_width, output_height), True)
142
+
143
+ try:
144
+ while True:
145
+ start_time = time.time()
146
+ flag, frame = cap.read()
147
+ if not flag:
148
+ break
149
+
150
+ # test a single image
151
+ result = inference_model(model, frame)
152
+
153
+ # blend raw image and prediction
154
+ draw_img = show_result_pyplot(model, frame, result,
155
+ show=False,
156
+ with_labels=False,
157
+ )
158
+
159
+ if draw_img.shape[0] != output_height or draw_img.shape[
160
+ 1] != output_width:
161
+ draw_img = cv2.resize(draw_img,
162
+ (output_width, output_height))
163
+ writer.write(draw_img)
164
+ finally:
165
+ if writer:
166
+ writer.release()
167
+ cap.release()
168
+
169
+ fig = visualize_stitch_pos(stitch_id)
170
+
171
+ return output_video_path, fig
172
+
173
+ def segment_image(image, stitch_id):
174
+ if stitch_id == 13:
175
+ # 13 is equivalent to 0
176
+ stitch_id = 0
177
+
178
+ model.backbone.reset_stitch_id(stitch_id)
179
+ result = inference_model(model, image)
180
+ draw_img = show_result_pyplot(model, image, result,
181
+ show=False,
182
+ with_labels=True,
183
+ )
184
+ fig = visualize_stitch_pos(stitch_id)
185
+ return draw_img, fig
186
+
187
+
188
+
189
+ with gr.Blocks() as image_demo:
190
+ with gr.Row():
191
+ with gr.Column():
192
+ image_input = gr.Image(label='Input Image')
193
+ stitch_slider = gr.Slider(minimum=0, maximum=134, step=1, label="Stitch ID")
194
+ with gr.Row():
195
+ clear_button = gr.ClearButton()
196
+ submit_button = gr.Button()
197
+
198
+ with gr.Column():
199
+ image_output = gr.Image(label='Segmentation Results')
200
+ stitch_plot = gr.Plot(label='Stitch Position')
201
+
202
+ submit_button.click(
203
+ fn=segment_image,
204
+ inputs=[image_input, stitch_slider],
205
+ outputs=[image_output, stitch_plot],
206
+ )
207
+
208
+ stitch_slider.change(
209
+ fn=visualize_stitch_pos,
210
+ inputs=[stitch_slider],
211
+ outputs=[stitch_plot],
212
+ show_progress=False
213
+ )
214
+
215
+ clear_button.click(
216
+ lambda: [None, 0, None, None],
217
+ outputs=[image_input, stitch_slider, image_output, stitch_plot],
218
+ )
219
+
220
+ gr.Examples(
221
+ [
222
+ ['./demo_1.jpg', 0],
223
+ ['./demo_2.jpg', 1],
224
+ ['./demo_3.jpg', 93],
225
+ ['./demo_4.jpg', 3],
226
+ ],
227
+ inputs=[
228
+ image_input,
229
+ stitch_slider
230
+ ],
231
+ outputs=[
232
+ image_input,
233
+ stitch_plot
234
+ ],
235
+ )
236
+
237
+ with gr.Blocks() as demo:
238
+ with gr.Column():
239
+ gr.HTML("""
240
+ <h1 align="center" style=" display: flex; flex-direction: row; justify-content: center; font-size: 25pt; ">Stitched ViTs are Flexible Vision Backbones</h1>
241
+ <div align="center"> <img align="center" src='file/gradio_banner.png' width="70%"> </div>
242
+ <h3 align="center" >This is the classification demo page of SN-Netv2, an flexible vision backbone that allows for 100+ runtime speed and performance trade-offs. You can also run this gradio demo on your local GPUs at <a href="https://github.com/ziplab/SN-Netv2">https://github.com/ziplab/SN-Netv2</a>, Paper link: <a href="https://arxiv.org/abs/2307.00154">https://arxiv.org/abs/2307.00154</a>.</h3>
243
+ """)
244
+ tabbed_page = gr.TabbedInterface([image_demo,], ['Image'])
245
+
246
+
247
+ demo.launch(allowed_paths=['./'])
248
+
249
+
250
+ if __name__ == '__main__':
251
+ main()
configs/_base_/datasets/ade20k.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'ADE20KDataset'
3
+ data_root = 'data/ade/ADEChallengeData2016'
4
+ crop_size = (512, 512)
5
+ train_pipeline = [
6
+ dict(type='LoadImageFromFile'),
7
+ dict(type='LoadAnnotations', reduce_zero_label=True),
8
+ dict(
9
+ type='RandomResize',
10
+ scale=(2048, 512),
11
+ ratio_range=(0.5, 2.0),
12
+ keep_ratio=True),
13
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14
+ dict(type='RandomFlip', prob=0.5),
15
+ dict(type='PhotoMetricDistortion'),
16
+ dict(type='PackSegInputs')
17
+ ]
18
+ test_pipeline = [
19
+ dict(type='LoadImageFromFile'),
20
+ dict(type='Resize', scale=(2048, 512), keep_ratio=True),
21
+ # add loading annotation after ``Resize`` because ground truth
22
+ # does not need to do resize data transform
23
+ dict(type='LoadAnnotations', reduce_zero_label=True),
24
+ dict(type='PackSegInputs')
25
+ ]
26
+ img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
27
+ tta_pipeline = [
28
+ dict(type='LoadImageFromFile', backend_args=None),
29
+ dict(
30
+ type='TestTimeAug',
31
+ transforms=[
32
+ [
33
+ dict(type='Resize', scale_factor=r, keep_ratio=True)
34
+ for r in img_ratios
35
+ ],
36
+ [
37
+ dict(type='RandomFlip', prob=0., direction='horizontal'),
38
+ dict(type='RandomFlip', prob=1., direction='horizontal')
39
+ ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
40
+ ])
41
+ ]
42
+ train_dataloader = dict(
43
+ batch_size=4,
44
+ num_workers=4,
45
+ persistent_workers=True,
46
+ sampler=dict(type='InfiniteSampler', shuffle=True),
47
+ dataset=dict(
48
+ type=dataset_type,
49
+ data_root=data_root,
50
+ data_prefix=dict(
51
+ img_path='images/training', seg_map_path='annotations/training'),
52
+ pipeline=train_pipeline))
53
+ val_dataloader = dict(
54
+ batch_size=1,
55
+ num_workers=4,
56
+ persistent_workers=True,
57
+ sampler=dict(type='DefaultSampler', shuffle=False),
58
+ dataset=dict(
59
+ type=dataset_type,
60
+ data_root=data_root,
61
+ data_prefix=dict(
62
+ img_path='images/validation',
63
+ seg_map_path='annotations/validation'),
64
+ pipeline=test_pipeline))
65
+ test_dataloader = val_dataloader
66
+
67
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
68
+ test_evaluator = val_evaluator
configs/_base_/datasets/ade20k_640x640.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'ADE20KDataset'
3
+ data_root = 'data/ade/ADEChallengeData2016'
4
+ crop_size = (640, 640)
5
+ train_pipeline = [
6
+ dict(type='LoadImageFromFile'),
7
+ dict(type='LoadAnnotations', reduce_zero_label=True),
8
+ dict(
9
+ type='RandomResize',
10
+ scale=(2560, 640),
11
+ ratio_range=(0.5, 2.0),
12
+ keep_ratio=True),
13
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14
+ dict(type='RandomFlip', prob=0.5),
15
+ dict(type='PhotoMetricDistortion'),
16
+ dict(type='PackSegInputs')
17
+ ]
18
+ test_pipeline = [
19
+ dict(type='LoadImageFromFile'),
20
+ dict(type='Resize', scale=(2560, 640), keep_ratio=True),
21
+ # add loading annotation after ``Resize`` because ground truth
22
+ # does not need to do resize data transform
23
+ dict(type='LoadAnnotations', reduce_zero_label=True),
24
+ dict(type='PackSegInputs')
25
+ ]
26
+ img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
27
+ tta_pipeline = [
28
+ dict(type='LoadImageFromFile', backend_args=None),
29
+ dict(
30
+ type='TestTimeAug',
31
+ transforms=[
32
+ [
33
+ dict(type='Resize', scale_factor=r, keep_ratio=True)
34
+ for r in img_ratios
35
+ ],
36
+ [
37
+ dict(type='RandomFlip', prob=0., direction='horizontal'),
38
+ dict(type='RandomFlip', prob=1., direction='horizontal')
39
+ ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
40
+ ])
41
+ ]
42
+ train_dataloader = dict(
43
+ batch_size=4,
44
+ num_workers=4,
45
+ persistent_workers=True,
46
+ sampler=dict(type='InfiniteSampler', shuffle=True),
47
+ dataset=dict(
48
+ type=dataset_type,
49
+ data_root=data_root,
50
+ data_prefix=dict(
51
+ img_path='images/training', seg_map_path='annotations/training'),
52
+ pipeline=train_pipeline))
53
+ val_dataloader = dict(
54
+ batch_size=1,
55
+ num_workers=4,
56
+ persistent_workers=True,
57
+ sampler=dict(type='DefaultSampler', shuffle=False),
58
+ dataset=dict(
59
+ type=dataset_type,
60
+ data_root=data_root,
61
+ data_prefix=dict(
62
+ img_path='images/validation',
63
+ seg_map_path='annotations/validation'),
64
+ pipeline=test_pipeline))
65
+ test_dataloader = val_dataloader
66
+
67
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
68
+ test_evaluator = val_evaluator
configs/_base_/datasets/bdd100k.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'BDD100KDataset'
3
+ data_root = 'data/bdd100k/'
4
+
5
+ crop_size = (512, 1024)
6
+ train_pipeline = [
7
+ dict(type='LoadImageFromFile'),
8
+ dict(type='LoadAnnotations'),
9
+ dict(
10
+ type='RandomResize',
11
+ scale=(2048, 1024),
12
+ ratio_range=(0.5, 2.0),
13
+ keep_ratio=True),
14
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
15
+ dict(type='RandomFlip', prob=0.5),
16
+ dict(type='PhotoMetricDistortion'),
17
+ dict(type='PackSegInputs')
18
+ ]
19
+ test_pipeline = [
20
+ dict(type='LoadImageFromFile'),
21
+ dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
22
+ # add loading annotation after ``Resize`` because ground truth
23
+ # does not need to do resize data transform
24
+ dict(type='LoadAnnotations'),
25
+ dict(type='PackSegInputs')
26
+ ]
27
+ img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
28
+ tta_pipeline = [
29
+ dict(type='LoadImageFromFile', backend_args=None),
30
+ dict(
31
+ type='TestTimeAug',
32
+ transforms=[
33
+ [
34
+ dict(type='Resize', scale_factor=r, keep_ratio=True)
35
+ for r in img_ratios
36
+ ],
37
+ [
38
+ dict(type='RandomFlip', prob=0., direction='horizontal'),
39
+ dict(type='RandomFlip', prob=1., direction='horizontal')
40
+ ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
41
+ ])
42
+ ]
43
+ train_dataloader = dict(
44
+ batch_size=2,
45
+ num_workers=2,
46
+ persistent_workers=True,
47
+ sampler=dict(type='InfiniteSampler', shuffle=True),
48
+ dataset=dict(
49
+ type=dataset_type,
50
+ data_root=data_root,
51
+ data_prefix=dict(
52
+ img_path='images/10k/train',
53
+ seg_map_path='labels/sem_seg/masks/train'),
54
+ pipeline=train_pipeline))
55
+ val_dataloader = dict(
56
+ batch_size=1,
57
+ num_workers=4,
58
+ persistent_workers=True,
59
+ sampler=dict(type='DefaultSampler', shuffle=False),
60
+ dataset=dict(
61
+ type=dataset_type,
62
+ data_root=data_root,
63
+ data_prefix=dict(
64
+ img_path='images/10k/val',
65
+ seg_map_path='labels/sem_seg/masks/val'),
66
+ pipeline=test_pipeline))
67
+ test_dataloader = val_dataloader
68
+
69
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
70
+ test_evaluator = val_evaluator
configs/_base_/datasets/chase_db1.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'ChaseDB1Dataset'
3
+ data_root = 'data/CHASE_DB1'
4
+ img_scale = (960, 999)
5
+ crop_size = (128, 128)
6
+ train_pipeline = [
7
+ dict(type='LoadImageFromFile'),
8
+ dict(type='LoadAnnotations'),
9
+ dict(
10
+ type='RandomResize',
11
+ scale=img_scale,
12
+ ratio_range=(0.5, 2.0),
13
+ keep_ratio=True),
14
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
15
+ dict(type='RandomFlip', prob=0.5),
16
+ dict(type='PhotoMetricDistortion'),
17
+ dict(type='PackSegInputs')
18
+ ]
19
+ test_pipeline = [
20
+ dict(type='LoadImageFromFile'),
21
+ dict(type='Resize', scale=img_scale, keep_ratio=True),
22
+ # add loading annotation after ``Resize`` because ground truth
23
+ # does not need to do resize data transform
24
+ dict(type='LoadAnnotations'),
25
+ dict(type='PackSegInputs')
26
+ ]
27
+ img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
28
+ tta_pipeline = [
29
+ dict(type='LoadImageFromFile', backend_args=None),
30
+ dict(
31
+ type='TestTimeAug',
32
+ transforms=[
33
+ [
34
+ dict(type='Resize', scale_factor=r, keep_ratio=True)
35
+ for r in img_ratios
36
+ ],
37
+ [
38
+ dict(type='RandomFlip', prob=0., direction='horizontal'),
39
+ dict(type='RandomFlip', prob=1., direction='horizontal')
40
+ ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
41
+ ])
42
+ ]
43
+
44
+ train_dataloader = dict(
45
+ batch_size=4,
46
+ num_workers=4,
47
+ persistent_workers=True,
48
+ sampler=dict(type='InfiniteSampler', shuffle=True),
49
+ dataset=dict(
50
+ type='RepeatDataset',
51
+ times=40000,
52
+ dataset=dict(
53
+ type=dataset_type,
54
+ data_root=data_root,
55
+ data_prefix=dict(
56
+ img_path='images/training',
57
+ seg_map_path='annotations/training'),
58
+ pipeline=train_pipeline)))
59
+
60
+ val_dataloader = dict(
61
+ batch_size=1,
62
+ num_workers=4,
63
+ persistent_workers=True,
64
+ sampler=dict(type='DefaultSampler', shuffle=False),
65
+ dataset=dict(
66
+ type=dataset_type,
67
+ data_root=data_root,
68
+ data_prefix=dict(
69
+ img_path='images/validation',
70
+ seg_map_path='annotations/validation'),
71
+ pipeline=test_pipeline))
72
+ test_dataloader = val_dataloader
73
+
74
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
75
+ test_evaluator = val_evaluator
configs/_base_/datasets/cityscapes.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'CityscapesDataset'
3
+ data_root = 'data/cityscapes/'
4
+ crop_size = (512, 1024)
5
+ train_pipeline = [
6
+ dict(type='LoadImageFromFile'),
7
+ dict(type='LoadAnnotations'),
8
+ dict(
9
+ type='RandomResize',
10
+ scale=(2048, 1024),
11
+ ratio_range=(0.5, 2.0),
12
+ keep_ratio=True),
13
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14
+ dict(type='RandomFlip', prob=0.5),
15
+ dict(type='PhotoMetricDistortion'),
16
+ dict(type='PackSegInputs')
17
+ ]
18
+ test_pipeline = [
19
+ dict(type='LoadImageFromFile'),
20
+ dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
21
+ # add loading annotation after ``Resize`` because ground truth
22
+ # does not need to do resize data transform
23
+ dict(type='LoadAnnotations'),
24
+ dict(type='PackSegInputs')
25
+ ]
26
+ img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
27
+ tta_pipeline = [
28
+ dict(type='LoadImageFromFile', backend_args=None),
29
+ dict(
30
+ type='TestTimeAug',
31
+ transforms=[
32
+ [
33
+ dict(type='Resize', scale_factor=r, keep_ratio=True)
34
+ for r in img_ratios
35
+ ],
36
+ [
37
+ dict(type='RandomFlip', prob=0., direction='horizontal'),
38
+ dict(type='RandomFlip', prob=1., direction='horizontal')
39
+ ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
40
+ ])
41
+ ]
42
+ train_dataloader = dict(
43
+ batch_size=2,
44
+ num_workers=2,
45
+ persistent_workers=True,
46
+ sampler=dict(type='InfiniteSampler', shuffle=True),
47
+ dataset=dict(
48
+ type=dataset_type,
49
+ data_root=data_root,
50
+ data_prefix=dict(
51
+ img_path='leftImg8bit/train', seg_map_path='gtFine/train'),
52
+ pipeline=train_pipeline))
53
+ val_dataloader = dict(
54
+ batch_size=1,
55
+ num_workers=4,
56
+ persistent_workers=True,
57
+ sampler=dict(type='DefaultSampler', shuffle=False),
58
+ dataset=dict(
59
+ type=dataset_type,
60
+ data_root=data_root,
61
+ data_prefix=dict(
62
+ img_path='leftImg8bit/val', seg_map_path='gtFine/val'),
63
+ pipeline=test_pipeline))
64
+ test_dataloader = val_dataloader
65
+
66
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
67
+ test_evaluator = val_evaluator
configs/_base_/datasets/cityscapes_1024x1024.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = './cityscapes.py'
2
+ crop_size = (1024, 1024)
3
+ train_pipeline = [
4
+ dict(type='LoadImageFromFile'),
5
+ dict(type='LoadAnnotations'),
6
+ dict(
7
+ type='RandomResize',
8
+ scale=(2048, 1024),
9
+ ratio_range=(0.5, 2.0),
10
+ keep_ratio=True),
11
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12
+ dict(type='RandomFlip', prob=0.5),
13
+ dict(type='PhotoMetricDistortion'),
14
+ dict(type='PackSegInputs')
15
+ ]
16
+ test_pipeline = [
17
+ dict(type='LoadImageFromFile'),
18
+ dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
19
+ # add loading annotation after ``Resize`` because ground truth
20
+ # does not need to do resize data transform
21
+ dict(type='LoadAnnotations'),
22
+ dict(type='PackSegInputs')
23
+ ]
24
+ train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
25
+ val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
26
+ test_dataloader = val_dataloader
27
+
28
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
29
+ test_evaluator = val_evaluator
configs/_base_/datasets/cityscapes_768x768.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = './cityscapes.py'
2
+ crop_size = (768, 768)
3
+ train_pipeline = [
4
+ dict(type='LoadImageFromFile'),
5
+ dict(type='LoadAnnotations'),
6
+ dict(
7
+ type='RandomResize',
8
+ scale=(2049, 1025),
9
+ ratio_range=(0.5, 2.0),
10
+ keep_ratio=True),
11
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12
+ dict(type='RandomFlip', prob=0.5),
13
+ dict(type='PhotoMetricDistortion'),
14
+ dict(type='PackSegInputs')
15
+ ]
16
+ test_pipeline = [
17
+ dict(type='LoadImageFromFile'),
18
+ dict(type='Resize', scale=(2049, 1025), keep_ratio=True),
19
+ # add loading annotation after ``Resize`` because ground truth
20
+ # does not need to do resize data transform
21
+ dict(type='LoadAnnotations'),
22
+ dict(type='PackSegInputs')
23
+ ]
24
+ train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
25
+ val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
26
+ test_dataloader = val_dataloader
27
+
28
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
29
+ test_evaluator = val_evaluator
configs/_base_/datasets/cityscapes_769x769.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = './cityscapes.py'
2
+ crop_size = (769, 769)
3
+ train_pipeline = [
4
+ dict(type='LoadImageFromFile'),
5
+ dict(type='LoadAnnotations'),
6
+ dict(
7
+ type='RandomResize',
8
+ scale=(2049, 1025),
9
+ ratio_range=(0.5, 2.0),
10
+ keep_ratio=True),
11
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12
+ dict(type='RandomFlip', prob=0.5),
13
+ dict(type='PhotoMetricDistortion'),
14
+ dict(type='PackSegInputs')
15
+ ]
16
+ test_pipeline = [
17
+ dict(type='LoadImageFromFile'),
18
+ dict(type='Resize', scale=(2049, 1025), keep_ratio=True),
19
+ # add loading annotation after ``Resize`` because ground truth
20
+ # does not need to do resize data transform
21
+ dict(type='LoadAnnotations'),
22
+ dict(type='PackSegInputs')
23
+ ]
24
+ train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
25
+ val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
26
+ test_dataloader = val_dataloader
27
+
28
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
29
+ test_evaluator = val_evaluator
configs/_base_/datasets/cityscapes_832x832.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = './cityscapes.py'
2
+ crop_size = (832, 832)
3
+ train_pipeline = [
4
+ dict(type='LoadImageFromFile'),
5
+ dict(type='LoadAnnotations'),
6
+ dict(
7
+ type='RandomResize',
8
+ scale=(2048, 1024),
9
+ ratio_range=(0.5, 2.0),
10
+ keep_ratio=True),
11
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12
+ dict(type='RandomFlip', prob=0.5),
13
+ dict(type='PhotoMetricDistortion'),
14
+ dict(type='PackSegInputs')
15
+ ]
16
+ test_pipeline = [
17
+ dict(type='LoadImageFromFile'),
18
+ dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
19
+ # add loading annotation after ``Resize`` because ground truth
20
+ # does not need to do resize data transform
21
+ dict(type='LoadAnnotations'),
22
+ dict(type='PackSegInputs')
23
+ ]
24
+ train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
25
+ val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
26
+ test_dataloader = val_dataloader
27
+
28
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
29
+ test_evaluator = val_evaluator
configs/_base_/datasets/coco-stuff10k.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'COCOStuffDataset'
3
+ data_root = 'data/coco_stuff10k'
4
+ crop_size = (512, 512)
5
+ train_pipeline = [
6
+ dict(type='LoadImageFromFile'),
7
+ dict(type='LoadAnnotations', reduce_zero_label=True),
8
+ dict(
9
+ type='RandomResize',
10
+ scale=(2048, 512),
11
+ ratio_range=(0.5, 2.0),
12
+ keep_ratio=True),
13
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14
+ dict(type='RandomFlip', prob=0.5),
15
+ dict(type='PhotoMetricDistortion'),
16
+ dict(type='PackSegInputs')
17
+ ]
18
+ test_pipeline = [
19
+ dict(type='LoadImageFromFile'),
20
+ dict(type='Resize', scale=(2048, 512), keep_ratio=True),
21
+ # add loading annotation after ``Resize`` because ground truth
22
+ # does not need to do resize data transform
23
+ dict(type='LoadAnnotations', reduce_zero_label=True),
24
+ dict(type='PackSegInputs')
25
+ ]
26
+ img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
27
+ tta_pipeline = [
28
+ dict(type='LoadImageFromFile', backend_args=None),
29
+ dict(
30
+ type='TestTimeAug',
31
+ transforms=[
32
+ [
33
+ dict(type='Resize', scale_factor=r, keep_ratio=True)
34
+ for r in img_ratios
35
+ ],
36
+ [
37
+ dict(type='RandomFlip', prob=0., direction='horizontal'),
38
+ dict(type='RandomFlip', prob=1., direction='horizontal')
39
+ ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
40
+ ])
41
+ ]
42
+ train_dataloader = dict(
43
+ batch_size=4,
44
+ num_workers=4,
45
+ persistent_workers=True,
46
+ sampler=dict(type='InfiniteSampler', shuffle=True),
47
+ dataset=dict(
48
+ type=dataset_type,
49
+ data_root=data_root,
50
+ reduce_zero_label=True,
51
+ data_prefix=dict(
52
+ img_path='images/train2014', seg_map_path='annotations/train2014'),
53
+ pipeline=train_pipeline))
54
+ val_dataloader = dict(
55
+ batch_size=1,
56
+ num_workers=4,
57
+ persistent_workers=True,
58
+ sampler=dict(type='DefaultSampler', shuffle=False),
59
+ dataset=dict(
60
+ type=dataset_type,
61
+ data_root=data_root,
62
+ reduce_zero_label=True,
63
+ data_prefix=dict(
64
+ img_path='images/test2014', seg_map_path='annotations/test2014'),
65
+ pipeline=test_pipeline))
66
+ test_dataloader = val_dataloader
67
+
68
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
69
+ test_evaluator = val_evaluator
configs/_base_/datasets/coco-stuff164k.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'COCOStuffDataset'
3
+ data_root = 'data/coco_stuff164k'
4
+ crop_size = (512, 512)
5
+ train_pipeline = [
6
+ dict(type='LoadImageFromFile'),
7
+ dict(type='LoadAnnotations'),
8
+ dict(
9
+ type='RandomResize',
10
+ scale=(2048, 512),
11
+ ratio_range=(0.5, 2.0),
12
+ keep_ratio=True),
13
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14
+ dict(type='RandomFlip', prob=0.5),
15
+ dict(type='PhotoMetricDistortion'),
16
+ dict(type='PackSegInputs')
17
+ ]
18
+ test_pipeline = [
19
+ dict(type='LoadImageFromFile'),
20
+ dict(type='Resize', scale=(2048, 512), keep_ratio=True),
21
+ # add loading annotation after ``Resize`` because ground truth
22
+ # does not need to do resize data transform
23
+ dict(type='LoadAnnotations'),
24
+ dict(type='PackSegInputs')
25
+ ]
26
+ img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
27
+ tta_pipeline = [
28
+ dict(type='LoadImageFromFile', backend_args=None),
29
+ dict(
30
+ type='TestTimeAug',
31
+ transforms=[
32
+ [
33
+ dict(type='Resize', scale_factor=r, keep_ratio=True)
34
+ for r in img_ratios
35
+ ],
36
+ [
37
+ dict(type='RandomFlip', prob=0., direction='horizontal'),
38
+ dict(type='RandomFlip', prob=1., direction='horizontal')
39
+ ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
40
+ ])
41
+ ]
42
+ train_dataloader = dict(
43
+ batch_size=4,
44
+ num_workers=4,
45
+ persistent_workers=True,
46
+ sampler=dict(type='InfiniteSampler', shuffle=True),
47
+ dataset=dict(
48
+ type=dataset_type,
49
+ data_root=data_root,
50
+ data_prefix=dict(
51
+ img_path='images/train2017', seg_map_path='annotations/train2017'),
52
+ pipeline=train_pipeline))
53
+ val_dataloader = dict(
54
+ batch_size=1,
55
+ num_workers=4,
56
+ persistent_workers=True,
57
+ sampler=dict(type='DefaultSampler', shuffle=False),
58
+ dataset=dict(
59
+ type=dataset_type,
60
+ data_root=data_root,
61
+ data_prefix=dict(
62
+ img_path='images/val2017', seg_map_path='annotations/val2017'),
63
+ pipeline=test_pipeline))
64
+ test_dataloader = val_dataloader
65
+
66
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
67
+ test_evaluator = val_evaluator
configs/_base_/datasets/drive.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'DRIVEDataset'
3
+ data_root = 'data/DRIVE'
4
+ img_scale = (584, 565)
5
+ crop_size = (64, 64)
6
+ train_pipeline = [
7
+ dict(type='LoadImageFromFile'),
8
+ dict(type='LoadAnnotations'),
9
+ dict(
10
+ type='RandomResize',
11
+ scale=img_scale,
12
+ ratio_range=(0.5, 2.0),
13
+ keep_ratio=True),
14
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
15
+ dict(type='RandomFlip', prob=0.5),
16
+ dict(type='PhotoMetricDistortion'),
17
+ dict(type='PackSegInputs')
18
+ ]
19
+ test_pipeline = [
20
+ dict(type='LoadImageFromFile'),
21
+ dict(type='Resize', scale=img_scale, keep_ratio=True),
22
+ # add loading annotation after ``Resize`` because ground truth
23
+ # does not need to do resize data transform
24
+ dict(type='LoadAnnotations'),
25
+ dict(type='PackSegInputs')
26
+ ]
27
+ img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
28
+ tta_pipeline = [
29
+ dict(type='LoadImageFromFile', backend_args=None),
30
+ dict(
31
+ type='TestTimeAug',
32
+ transforms=[
33
+ [
34
+ dict(type='Resize', scale_factor=r, keep_ratio=True)
35
+ for r in img_ratios
36
+ ],
37
+ [
38
+ dict(type='RandomFlip', prob=0., direction='horizontal'),
39
+ dict(type='RandomFlip', prob=1., direction='horizontal')
40
+ ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
41
+ ])
42
+ ]
43
+ train_dataloader = dict(
44
+ batch_size=4,
45
+ num_workers=4,
46
+ persistent_workers=True,
47
+ sampler=dict(type='InfiniteSampler', shuffle=True),
48
+ dataset=dict(
49
+ type='RepeatDataset',
50
+ times=40000,
51
+ dataset=dict(
52
+ type=dataset_type,
53
+ data_root=data_root,
54
+ data_prefix=dict(
55
+ img_path='images/training',
56
+ seg_map_path='annotations/training'),
57
+ pipeline=train_pipeline)))
58
+ val_dataloader = dict(
59
+ batch_size=1,
60
+ num_workers=4,
61
+ persistent_workers=True,
62
+ sampler=dict(type='DefaultSampler', shuffle=False),
63
+ dataset=dict(
64
+ type=dataset_type,
65
+ data_root=data_root,
66
+ data_prefix=dict(
67
+ img_path='images/validation',
68
+ seg_map_path='annotations/validation'),
69
+ pipeline=test_pipeline))
70
+ test_dataloader = val_dataloader
71
+
72
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
73
+ test_evaluator = val_evaluator
configs/_base_/datasets/hrf.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'HRFDataset'
3
+ data_root = 'data/HRF'
4
+ img_scale = (2336, 3504)
5
+ crop_size = (256, 256)
6
+ train_pipeline = [
7
+ dict(type='LoadImageFromFile'),
8
+ dict(type='LoadAnnotations'),
9
+ dict(
10
+ type='RandomResize',
11
+ scale=img_scale,
12
+ ratio_range=(0.5, 2.0),
13
+ keep_ratio=True),
14
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
15
+ dict(type='RandomFlip', prob=0.5),
16
+ dict(type='PhotoMetricDistortion'),
17
+ dict(type='PackSegInputs')
18
+ ]
19
+ test_pipeline = [
20
+ dict(type='LoadImageFromFile'),
21
+ dict(type='Resize', scale=img_scale, keep_ratio=True),
22
+ # add loading annotation after ``Resize`` because ground truth
23
+ # does not need to do resize data transform
24
+ dict(type='LoadAnnotations'),
25
+ dict(type='PackSegInputs')
26
+ ]
27
+ img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
28
+ tta_pipeline = [
29
+ dict(type='LoadImageFromFile', backend_args=None),
30
+ dict(
31
+ type='TestTimeAug',
32
+ transforms=[
33
+ [
34
+ dict(type='Resize', scale_factor=r, keep_ratio=True)
35
+ for r in img_ratios
36
+ ],
37
+ [
38
+ dict(type='RandomFlip', prob=0., direction='horizontal'),
39
+ dict(type='RandomFlip', prob=1., direction='horizontal')
40
+ ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
41
+ ])
42
+ ]
43
+ train_dataloader = dict(
44
+ batch_size=4,
45
+ num_workers=4,
46
+ persistent_workers=True,
47
+ sampler=dict(type='InfiniteSampler', shuffle=True),
48
+ dataset=dict(
49
+ type='RepeatDataset',
50
+ times=40000,
51
+ dataset=dict(
52
+ type=dataset_type,
53
+ data_root=data_root,
54
+ data_prefix=dict(
55
+ img_path='images/training',
56
+ seg_map_path='annotations/training'),
57
+ pipeline=train_pipeline)))
58
+ val_dataloader = dict(
59
+ batch_size=1,
60
+ num_workers=4,
61
+ persistent_workers=True,
62
+ sampler=dict(type='DefaultSampler', shuffle=False),
63
+ dataset=dict(
64
+ type=dataset_type,
65
+ data_root=data_root,
66
+ data_prefix=dict(
67
+ img_path='images/validation',
68
+ seg_map_path='annotations/validation'),
69
+ pipeline=test_pipeline))
70
+ test_dataloader = val_dataloader
71
+
72
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
73
+ test_evaluator = val_evaluator
configs/_base_/datasets/isaid.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'iSAIDDataset'
3
+ data_root = 'data/iSAID'
4
+ """
5
+ This crop_size setting is followed by the implementation of
6
+ `PointFlow: Flowing Semantics Through Points for Aerial Image
7
+ Segmentation <https://arxiv.org/pdf/2103.06564.pdf>`_.
8
+ """
9
+
10
+ crop_size = (896, 896)
11
+
12
+ train_pipeline = [
13
+ dict(type='LoadImageFromFile'),
14
+ dict(type='LoadAnnotations'),
15
+ dict(
16
+ type='RandomResize',
17
+ scale=(896, 896),
18
+ ratio_range=(0.5, 2.0),
19
+ keep_ratio=True),
20
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
21
+ dict(type='RandomFlip', prob=0.5),
22
+ dict(type='PhotoMetricDistortion'),
23
+ dict(type='PackSegInputs')
24
+ ]
25
+ test_pipeline = [
26
+ dict(type='LoadImageFromFile'),
27
+ dict(type='Resize', scale=(896, 896), keep_ratio=True),
28
+ # add loading annotation after ``Resize`` because ground truth
29
+ # does not need to do resize data transform
30
+ dict(type='LoadAnnotations'),
31
+ dict(type='PackSegInputs')
32
+ ]
33
+ img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
34
+ tta_pipeline = [
35
+ dict(type='LoadImageFromFile', backend_args=None),
36
+ dict(
37
+ type='TestTimeAug',
38
+ transforms=[
39
+ [
40
+ dict(type='Resize', scale_factor=r, keep_ratio=True)
41
+ for r in img_ratios
42
+ ],
43
+ [
44
+ dict(type='RandomFlip', prob=0., direction='horizontal'),
45
+ dict(type='RandomFlip', prob=1., direction='horizontal')
46
+ ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
47
+ ])
48
+ ]
49
+ train_dataloader = dict(
50
+ batch_size=4,
51
+ num_workers=4,
52
+ persistent_workers=True,
53
+ sampler=dict(type='InfiniteSampler', shuffle=True),
54
+ dataset=dict(
55
+ type=dataset_type,
56
+ data_root=data_root,
57
+ data_prefix=dict(
58
+ img_path='img_dir/train', seg_map_path='ann_dir/train'),
59
+ pipeline=train_pipeline))
60
+ val_dataloader = dict(
61
+ batch_size=1,
62
+ num_workers=4,
63
+ persistent_workers=True,
64
+ sampler=dict(type='DefaultSampler', shuffle=False),
65
+ dataset=dict(
66
+ type=dataset_type,
67
+ data_root=data_root,
68
+ data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
69
+ pipeline=test_pipeline))
70
+ test_dataloader = val_dataloader
71
+
72
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
73
+ test_evaluator = val_evaluator
configs/_base_/datasets/levir_256x256.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'LEVIRCDDataset'
3
+ data_root = r'data/LEVIRCD'
4
+
5
+ albu_train_transforms = [
6
+ dict(type='RandomBrightnessContrast', p=0.2),
7
+ dict(type='HorizontalFlip', p=0.5),
8
+ dict(type='VerticalFlip', p=0.5)
9
+ ]
10
+
11
+ train_pipeline = [
12
+ dict(type='LoadMultipleRSImageFromFile'),
13
+ dict(type='LoadAnnotations'),
14
+ dict(type='Albu', transforms=albu_train_transforms),
15
+ dict(type='ConcatCDInput'),
16
+ dict(type='PackSegInputs')
17
+ ]
18
+ test_pipeline = [
19
+ dict(type='LoadMultipleRSImageFromFile'),
20
+ dict(type='LoadAnnotations'),
21
+ dict(type='ConcatCDInput'),
22
+ dict(type='PackSegInputs')
23
+ ]
24
+
25
+ tta_pipeline = [
26
+ dict(type='LoadMultipleRSImageFromFile'),
27
+ dict(
28
+ type='TestTimeAug',
29
+ transforms=[[dict(type='LoadAnnotations')],
30
+ [dict(type='ConcatCDInput')],
31
+ [dict(type='PackSegInputs')]])
32
+ ]
33
+ train_dataloader = dict(
34
+ batch_size=4,
35
+ num_workers=4,
36
+ persistent_workers=True,
37
+ sampler=dict(type='InfiniteSampler', shuffle=True),
38
+ dataset=dict(
39
+ type=dataset_type,
40
+ data_root=data_root,
41
+ data_prefix=dict(
42
+ img_path='train/A',
43
+ img_path2='train/B',
44
+ seg_map_path='train/label'),
45
+ pipeline=train_pipeline))
46
+ val_dataloader = dict(
47
+ batch_size=1,
48
+ num_workers=4,
49
+ persistent_workers=True,
50
+ sampler=dict(type='DefaultSampler', shuffle=False),
51
+ dataset=dict(
52
+ type=dataset_type,
53
+ data_root=data_root,
54
+ data_prefix=dict(
55
+ img_path='test/A', img_path2='test/B', seg_map_path='test/label'),
56
+ pipeline=test_pipeline))
57
+ test_dataloader = val_dataloader
58
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
59
+ test_evaluator = val_evaluator
configs/_base_/datasets/loveda.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'LoveDADataset'
3
+ data_root = 'data/loveDA'
4
+ crop_size = (512, 512)
5
+ train_pipeline = [
6
+ dict(type='LoadImageFromFile'),
7
+ dict(type='LoadAnnotations', reduce_zero_label=True),
8
+ dict(
9
+ type='RandomResize',
10
+ scale=(2048, 512),
11
+ ratio_range=(0.5, 2.0),
12
+ keep_ratio=True),
13
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14
+ dict(type='RandomFlip', prob=0.5),
15
+ dict(type='PhotoMetricDistortion'),
16
+ dict(type='PackSegInputs')
17
+ ]
18
+ test_pipeline = [
19
+ dict(type='LoadImageFromFile'),
20
+ dict(type='Resize', scale=(1024, 1024), keep_ratio=True),
21
+ # add loading annotation after ``Resize`` because ground truth
22
+ # does not need to do resize data transform
23
+ dict(type='LoadAnnotations', reduce_zero_label=True),
24
+ dict(type='PackSegInputs')
25
+ ]
26
+ img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
27
+ tta_pipeline = [
28
+ dict(type='LoadImageFromFile', backend_args=None),
29
+ dict(
30
+ type='TestTimeAug',
31
+ transforms=[
32
+ [
33
+ dict(type='Resize', scale_factor=r, keep_ratio=True)
34
+ for r in img_ratios
35
+ ],
36
+ [
37
+ dict(type='RandomFlip', prob=0., direction='horizontal'),
38
+ dict(type='RandomFlip', prob=1., direction='horizontal')
39
+ ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
40
+ ])
41
+ ]
42
+ train_dataloader = dict(
43
+ batch_size=4,
44
+ num_workers=4,
45
+ persistent_workers=True,
46
+ sampler=dict(type='InfiniteSampler', shuffle=True),
47
+ dataset=dict(
48
+ type=dataset_type,
49
+ data_root=data_root,
50
+ data_prefix=dict(
51
+ img_path='img_dir/train', seg_map_path='ann_dir/train'),
52
+ pipeline=train_pipeline))
53
+ val_dataloader = dict(
54
+ batch_size=1,
55
+ num_workers=4,
56
+ persistent_workers=True,
57
+ sampler=dict(type='DefaultSampler', shuffle=False),
58
+ dataset=dict(
59
+ type=dataset_type,
60
+ data_root=data_root,
61
+ data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
62
+ pipeline=test_pipeline))
63
+ test_dataloader = val_dataloader
64
+
65
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
66
+ test_evaluator = val_evaluator
configs/_base_/datasets/mapillary_v1.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'MapillaryDataset_v1'
3
+ data_root = 'data/mapillary/'
4
+ crop_size = (512, 1024)
5
+ train_pipeline = [
6
+ dict(type='LoadImageFromFile'),
7
+ dict(type='LoadAnnotations'),
8
+ dict(
9
+ type='RandomResize',
10
+ scale=(2048, 1024),
11
+ ratio_range=(0.5, 2.0),
12
+ keep_ratio=True),
13
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14
+ dict(type='RandomFlip', prob=0.5),
15
+ dict(type='PhotoMetricDistortion'),
16
+ dict(type='PackSegInputs')
17
+ ]
18
+ test_pipeline = [
19
+ dict(type='LoadImageFromFile'),
20
+ dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
21
+ # add loading annotation after ``Resize`` because ground truth
22
+ # does not need to do resize data transform
23
+ dict(type='LoadAnnotations'),
24
+ dict(type='PackSegInputs')
25
+ ]
26
+ img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
27
+ tta_pipeline = [
28
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
29
+ dict(
30
+ type='TestTimeAug',
31
+ transforms=[
32
+ [
33
+ dict(type='Resize', scale_factor=r, keep_ratio=True)
34
+ for r in img_ratios
35
+ ],
36
+ [
37
+ dict(type='RandomFlip', prob=0., direction='horizontal'),
38
+ dict(type='RandomFlip', prob=1., direction='horizontal')
39
+ ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
40
+ ])
41
+ ]
42
+ train_dataloader = dict(
43
+ batch_size=2,
44
+ num_workers=4,
45
+ persistent_workers=True,
46
+ sampler=dict(type='InfiniteSampler', shuffle=True),
47
+ dataset=dict(
48
+ type=dataset_type,
49
+ data_root=data_root,
50
+ data_prefix=dict(
51
+ img_path='training/images', seg_map_path='training/v1.2/labels'),
52
+ pipeline=train_pipeline))
53
+ val_dataloader = dict(
54
+ batch_size=1,
55
+ num_workers=4,
56
+ persistent_workers=True,
57
+ sampler=dict(type='DefaultSampler', shuffle=False),
58
+ dataset=dict(
59
+ type=dataset_type,
60
+ data_root=data_root,
61
+ data_prefix=dict(
62
+ img_path='validation/images',
63
+ seg_map_path='validation/v1.2/labels'),
64
+ pipeline=test_pipeline))
65
+ test_dataloader = val_dataloader
66
+
67
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
68
+ test_evaluator = val_evaluator
configs/_base_/datasets/mapillary_v1_65.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ _base_ = './mapillary_v1.py'
3
+ metainfo = dict(
4
+ classes=('Bird', 'Ground Animal', 'Curb', 'Fence', 'Guard Rail', 'Barrier',
5
+ 'Wall', 'Bike Lane', 'Crosswalk - Plain', 'Curb Cut', 'Parking',
6
+ 'Pedestrian Area', 'Rail Track', 'Road', 'Service Lane',
7
+ 'Sidewalk', 'Bridge', 'Building', 'Tunnel', 'Person', 'Bicyclist',
8
+ 'Motorcyclist', 'Other Rider', 'Lane Marking - Crosswalk',
9
+ 'Lane Marking - General', 'Mountain', 'Sand', 'Sky', 'Snow',
10
+ 'Terrain', 'Vegetation', 'Water', 'Banner', 'Bench', 'Bike Rack',
11
+ 'Billboard', 'Catch Basin', 'CCTV Camera', 'Fire Hydrant',
12
+ 'Junction Box', 'Mailbox', 'Manhole', 'Phone Booth', 'Pothole',
13
+ 'Street Light', 'Pole', 'Traffic Sign Frame', 'Utility Pole',
14
+ 'Traffic Light', 'Traffic Sign (Back)', 'Traffic Sign (Front)',
15
+ 'Trash Can', 'Bicycle', 'Boat', 'Bus', 'Car', 'Caravan',
16
+ 'Motorcycle', 'On Rails', 'Other Vehicle', 'Trailer', 'Truck',
17
+ 'Wheeled Slow', 'Car Mount', 'Ego Vehicle'),
18
+ palette=[[165, 42, 42], [0, 192, 0], [196, 196, 196], [190, 153, 153],
19
+ [180, 165, 180], [90, 120, 150], [102, 102, 156], [128, 64, 255],
20
+ [140, 140, 200], [170, 170, 170], [250, 170, 160], [96, 96, 96],
21
+ [230, 150, 140], [128, 64, 128], [110, 110, 110], [244, 35, 232],
22
+ [150, 100, 100], [70, 70, 70], [150, 120, 90], [220, 20, 60],
23
+ [255, 0, 0], [255, 0, 100], [255, 0, 200], [200, 128, 128],
24
+ [255, 255, 255], [64, 170, 64], [230, 160, 50], [70, 130, 180],
25
+ [190, 255, 255], [152, 251, 152], [107, 142, 35], [0, 170, 30],
26
+ [255, 255, 128], [250, 0, 30], [100, 140, 180], [220, 220, 220],
27
+ [220, 128, 128], [222, 40, 40], [100, 170, 30], [40, 40, 40],
28
+ [33, 33, 33], [100, 128, 160], [142, 0, 0], [70, 100, 150],
29
+ [210, 170, 100], [153, 153, 153], [128, 128, 128], [0, 0, 80],
30
+ [250, 170, 30], [192, 192, 192], [220, 220, 0], [140, 140, 20],
31
+ [119, 11, 32], [150, 0, 255], [0, 60, 100], [0, 0, 142],
32
+ [0, 0, 90], [0, 0, 230], [0, 80, 100], [128, 64, 64], [0, 0, 110],
33
+ [0, 0, 70], [0, 0, 192], [32, 32, 32], [120, 10, 10]])
34
+
35
+ train_dataloader = dict(dataset=dict(metainfo=metainfo))
36
+ val_dataloader = dict(dataset=dict(metainfo=metainfo))
37
+ test_dataloader = val_dataloader
configs/_base_/datasets/mapillary_v2.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'MapillaryDataset_v2'
3
+ data_root = 'data/mapillary/'
4
+ crop_size = (512, 1024)
5
+ train_pipeline = [
6
+ dict(type='LoadImageFromFile'),
7
+ dict(type='LoadAnnotations'),
8
+ dict(
9
+ type='RandomResize',
10
+ scale=(2048, 1024),
11
+ ratio_range=(0.5, 2.0),
12
+ keep_ratio=True),
13
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14
+ dict(type='RandomFlip', prob=0.5),
15
+ dict(type='PhotoMetricDistortion'),
16
+ dict(type='PackSegInputs')
17
+ ]
18
+ test_pipeline = [
19
+ dict(type='LoadImageFromFile'),
20
+ dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
21
+ # add loading annotation after ``Resize`` because ground truth
22
+ # does not need to do resize data transform
23
+ dict(type='LoadAnnotations'),
24
+ dict(type='PackSegInputs')
25
+ ]
26
+ img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
27
+ tta_pipeline = [
28
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
29
+ dict(
30
+ type='TestTimeAug',
31
+ transforms=[
32
+ [
33
+ dict(type='Resize', scale_factor=r, keep_ratio=True)
34
+ for r in img_ratios
35
+ ],
36
+ [
37
+ dict(type='RandomFlip', prob=0., direction='horizontal'),
38
+ dict(type='RandomFlip', prob=1., direction='horizontal')
39
+ ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
40
+ ])
41
+ ]
42
+ train_dataloader = dict(
43
+ batch_size=2,
44
+ num_workers=4,
45
+ persistent_workers=True,
46
+ sampler=dict(type='InfiniteSampler', shuffle=True),
47
+ dataset=dict(
48
+ type=dataset_type,
49
+ data_root=data_root,
50
+ data_prefix=dict(
51
+ img_path='training/images', seg_map_path='training/v2.0/labels'),
52
+ pipeline=train_pipeline))
53
+ val_dataloader = dict(
54
+ batch_size=1,
55
+ num_workers=4,
56
+ persistent_workers=True,
57
+ sampler=dict(type='DefaultSampler', shuffle=False),
58
+ dataset=dict(
59
+ type=dataset_type,
60
+ data_root=data_root,
61
+ data_prefix=dict(
62
+ img_path='validation/images',
63
+ seg_map_path='validation/v2.0/labels'),
64
+ pipeline=test_pipeline))
65
+ test_dataloader = val_dataloader
66
+
67
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
68
+ test_evaluator = val_evaluator
configs/_base_/datasets/nyu.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'NYUDataset'
3
+ data_root = 'data/nyu'
4
+
5
+ train_pipeline = [
6
+ dict(type='LoadImageFromFile'),
7
+ dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3),
8
+ dict(type='RandomDepthMix', prob=0.25),
9
+ dict(type='RandomFlip', prob=0.5),
10
+ dict(type='RandomCrop', crop_size=(480, 480)),
11
+ dict(
12
+ type='Albu',
13
+ transforms=[
14
+ dict(type='RandomBrightnessContrast'),
15
+ dict(type='RandomGamma'),
16
+ dict(type='HueSaturationValue'),
17
+ ]),
18
+ dict(
19
+ type='PackSegInputs',
20
+ meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
21
+ 'pad_shape', 'scale_factor', 'flip', 'flip_direction',
22
+ 'category_id')),
23
+ ]
24
+
25
+ test_pipeline = [
26
+ dict(type='LoadImageFromFile'),
27
+ dict(type='Resize', scale=(2000, 480), keep_ratio=True),
28
+ dict(dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3)),
29
+ dict(
30
+ type='PackSegInputs',
31
+ meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
32
+ 'pad_shape', 'scale_factor', 'flip', 'flip_direction',
33
+ 'category_id'))
34
+ ]
35
+
36
+ train_dataloader = dict(
37
+ batch_size=8,
38
+ num_workers=8,
39
+ persistent_workers=True,
40
+ sampler=dict(type='InfiniteSampler', shuffle=True),
41
+ dataset=dict(
42
+ type=dataset_type,
43
+ data_root=data_root,
44
+ data_prefix=dict(
45
+ img_path='images/train', depth_map_path='annotations/train'),
46
+ pipeline=train_pipeline))
47
+
48
+ val_dataloader = dict(
49
+ batch_size=1,
50
+ num_workers=4,
51
+ persistent_workers=True,
52
+ sampler=dict(type='DefaultSampler', shuffle=False),
53
+ dataset=dict(
54
+ type=dataset_type,
55
+ data_root=data_root,
56
+ test_mode=True,
57
+ data_prefix=dict(
58
+ img_path='images/test', depth_map_path='annotations/test'),
59
+ pipeline=test_pipeline))
60
+ test_dataloader = val_dataloader
61
+
62
+ val_evaluator = dict(
63
+ type='DepthMetric',
64
+ min_depth_eval=0.001,
65
+ max_depth_eval=10.0,
66
+ crop_type='nyu_crop')
67
+ test_evaluator = val_evaluator
configs/_base_/datasets/nyu_512x512.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'NYUDataset'
3
+ data_root = 'data/nyu'
4
+
5
+ train_pipeline = [
6
+ dict(type='LoadImageFromFile'),
7
+ dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3),
8
+ dict(type='RandomDepthMix', prob=0.25),
9
+ dict(type='RandomFlip', prob=0.5),
10
+ dict(
11
+ type='RandomResize',
12
+ scale=(768, 512),
13
+ ratio_range=(0.8, 1.5),
14
+ keep_ratio=True),
15
+ dict(type='RandomCrop', crop_size=(512, 512)),
16
+ dict(
17
+ type='Albu',
18
+ transforms=[
19
+ dict(type='RandomBrightnessContrast'),
20
+ dict(type='RandomGamma'),
21
+ dict(type='HueSaturationValue'),
22
+ ]),
23
+ dict(
24
+ type='PackSegInputs',
25
+ meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
26
+ 'pad_shape', 'scale_factor', 'flip', 'flip_direction',
27
+ 'category_id')),
28
+ ]
29
+
30
+ test_pipeline = [
31
+ dict(type='LoadImageFromFile'),
32
+ dict(type='Resize', scale=(2048, 512), keep_ratio=True),
33
+ dict(dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3)),
34
+ dict(
35
+ type='PackSegInputs',
36
+ meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
37
+ 'pad_shape', 'scale_factor', 'flip', 'flip_direction',
38
+ 'category_id'))
39
+ ]
40
+
41
+ train_dataloader = dict(
42
+ batch_size=8,
43
+ num_workers=8,
44
+ persistent_workers=True,
45
+ sampler=dict(type='InfiniteSampler', shuffle=True),
46
+ dataset=dict(
47
+ type=dataset_type,
48
+ data_root=data_root,
49
+ data_prefix=dict(
50
+ img_path='images/train', depth_map_path='annotations/train'),
51
+ pipeline=train_pipeline))
52
+
53
+ val_dataloader = dict(
54
+ batch_size=1,
55
+ num_workers=4,
56
+ persistent_workers=True,
57
+ sampler=dict(type='DefaultSampler', shuffle=False),
58
+ dataset=dict(
59
+ type=dataset_type,
60
+ data_root=data_root,
61
+ test_mode=True,
62
+ data_prefix=dict(
63
+ img_path='images/test', depth_map_path='annotations/test'),
64
+ pipeline=test_pipeline))
65
+ test_dataloader = val_dataloader
66
+
67
+ val_evaluator = dict(
68
+ type='DepthMetric',
69
+ min_depth_eval=0.001,
70
+ max_depth_eval=10.0,
71
+ crop_type='nyu_crop')
72
+ test_evaluator = val_evaluator
configs/_base_/datasets/pascal_context.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'PascalContextDataset'
3
+ data_root = 'data/VOCdevkit/VOC2010/'
4
+
5
+ img_scale = (520, 520)
6
+ crop_size = (480, 480)
7
+
8
+ train_pipeline = [
9
+ dict(type='LoadImageFromFile'),
10
+ dict(type='LoadAnnotations'),
11
+ dict(
12
+ type='RandomResize',
13
+ scale=img_scale,
14
+ ratio_range=(0.5, 2.0),
15
+ keep_ratio=True),
16
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
17
+ dict(type='RandomFlip', prob=0.5),
18
+ dict(type='PhotoMetricDistortion'),
19
+ dict(type='PackSegInputs')
20
+ ]
21
+ test_pipeline = [
22
+ dict(type='LoadImageFromFile'),
23
+ dict(type='Resize', scale=img_scale, keep_ratio=True),
24
+ # add loading annotation after ``Resize`` because ground truth
25
+ # does not need to do resize data transform
26
+ dict(type='LoadAnnotations'),
27
+ dict(type='PackSegInputs')
28
+ ]
29
+ train_dataloader = dict(
30
+ batch_size=4,
31
+ num_workers=4,
32
+ persistent_workers=True,
33
+ sampler=dict(type='InfiniteSampler', shuffle=True),
34
+ dataset=dict(
35
+ type=dataset_type,
36
+ data_root=data_root,
37
+ data_prefix=dict(
38
+ img_path='JPEGImages', seg_map_path='SegmentationClassContext'),
39
+ ann_file='ImageSets/SegmentationContext/train.txt',
40
+ pipeline=train_pipeline))
41
+ val_dataloader = dict(
42
+ batch_size=1,
43
+ num_workers=4,
44
+ persistent_workers=True,
45
+ sampler=dict(type='DefaultSampler', shuffle=False),
46
+ dataset=dict(
47
+ type=dataset_type,
48
+ data_root=data_root,
49
+ data_prefix=dict(
50
+ img_path='JPEGImages', seg_map_path='SegmentationClassContext'),
51
+ ann_file='ImageSets/SegmentationContext/val.txt',
52
+ pipeline=test_pipeline))
53
+ test_dataloader = val_dataloader
54
+
55
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
56
+ test_evaluator = val_evaluator
configs/_base_/datasets/pascal_context_59.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'PascalContextDataset59'
3
+ data_root = 'data/VOCdevkit/VOC2010/'
4
+
5
+ img_scale = (520, 520)
6
+ crop_size = (480, 480)
7
+
8
+ train_pipeline = [
9
+ dict(type='LoadImageFromFile'),
10
+ dict(type='LoadAnnotations', reduce_zero_label=True),
11
+ dict(
12
+ type='RandomResize',
13
+ scale=img_scale,
14
+ ratio_range=(0.5, 2.0),
15
+ keep_ratio=True),
16
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
17
+ dict(type='RandomFlip', prob=0.5),
18
+ dict(type='PhotoMetricDistortion'),
19
+ dict(type='PackSegInputs')
20
+ ]
21
+ test_pipeline = [
22
+ dict(type='LoadImageFromFile'),
23
+ dict(type='Resize', scale=img_scale, keep_ratio=True),
24
+ # add loading annotation after ``Resize`` because ground truth
25
+ # does not need to do resize data transform
26
+ dict(type='LoadAnnotations', reduce_zero_label=True),
27
+ dict(type='PackSegInputs')
28
+ ]
29
+ img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
30
+ tta_pipeline = [
31
+ dict(type='LoadImageFromFile', backend_args=None),
32
+ dict(
33
+ type='TestTimeAug',
34
+ transforms=[
35
+ [
36
+ dict(type='Resize', scale_factor=r, keep_ratio=True)
37
+ for r in img_ratios
38
+ ],
39
+ [
40
+ dict(type='RandomFlip', prob=0., direction='horizontal'),
41
+ dict(type='RandomFlip', prob=1., direction='horizontal')
42
+ ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
43
+ ])
44
+ ]
45
+ train_dataloader = dict(
46
+ batch_size=4,
47
+ num_workers=4,
48
+ persistent_workers=True,
49
+ sampler=dict(type='InfiniteSampler', shuffle=True),
50
+ dataset=dict(
51
+ type=dataset_type,
52
+ data_root=data_root,
53
+ data_prefix=dict(
54
+ img_path='JPEGImages', seg_map_path='SegmentationClassContext'),
55
+ ann_file='ImageSets/SegmentationContext/train.txt',
56
+ pipeline=train_pipeline))
57
+ val_dataloader = dict(
58
+ batch_size=1,
59
+ num_workers=4,
60
+ persistent_workers=True,
61
+ sampler=dict(type='DefaultSampler', shuffle=False),
62
+ dataset=dict(
63
+ type=dataset_type,
64
+ data_root=data_root,
65
+ data_prefix=dict(
66
+ img_path='JPEGImages', seg_map_path='SegmentationClassContext'),
67
+ ann_file='ImageSets/SegmentationContext/val.txt',
68
+ pipeline=test_pipeline))
69
+ test_dataloader = val_dataloader
70
+
71
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
72
+ test_evaluator = val_evaluator
configs/_base_/datasets/pascal_voc12.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'PascalVOCDataset'
3
+ data_root = 'data/VOCdevkit/VOC2012'
4
+ crop_size = (512, 512)
5
+ train_pipeline = [
6
+ dict(type='LoadImageFromFile'),
7
+ dict(type='LoadAnnotations'),
8
+ dict(
9
+ type='RandomResize',
10
+ scale=(2048, 512),
11
+ ratio_range=(0.5, 2.0),
12
+ keep_ratio=True),
13
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14
+ dict(type='RandomFlip', prob=0.5),
15
+ dict(type='PhotoMetricDistortion'),
16
+ dict(type='PackSegInputs')
17
+ ]
18
+ test_pipeline = [
19
+ dict(type='LoadImageFromFile'),
20
+ dict(type='Resize', scale=(2048, 512), keep_ratio=True),
21
+ # add loading annotation after ``Resize`` because ground truth
22
+ # does not need to do resize data transform
23
+ dict(type='LoadAnnotations'),
24
+ dict(type='PackSegInputs')
25
+ ]
26
+ img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
27
+ tta_pipeline = [
28
+ dict(type='LoadImageFromFile', backend_args=None),
29
+ dict(
30
+ type='TestTimeAug',
31
+ transforms=[
32
+ [
33
+ dict(type='Resize', scale_factor=r, keep_ratio=True)
34
+ for r in img_ratios
35
+ ],
36
+ [
37
+ dict(type='RandomFlip', prob=0., direction='horizontal'),
38
+ dict(type='RandomFlip', prob=1., direction='horizontal')
39
+ ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
40
+ ])
41
+ ]
42
+ train_dataloader = dict(
43
+ batch_size=4,
44
+ num_workers=4,
45
+ persistent_workers=True,
46
+ sampler=dict(type='InfiniteSampler', shuffle=True),
47
+ dataset=dict(
48
+ type=dataset_type,
49
+ data_root=data_root,
50
+ data_prefix=dict(
51
+ img_path='JPEGImages', seg_map_path='SegmentationClass'),
52
+ ann_file='ImageSets/Segmentation/train.txt',
53
+ pipeline=train_pipeline))
54
+ val_dataloader = dict(
55
+ batch_size=1,
56
+ num_workers=4,
57
+ persistent_workers=True,
58
+ sampler=dict(type='DefaultSampler', shuffle=False),
59
+ dataset=dict(
60
+ type=dataset_type,
61
+ data_root=data_root,
62
+ data_prefix=dict(
63
+ img_path='JPEGImages', seg_map_path='SegmentationClass'),
64
+ ann_file='ImageSets/Segmentation/val.txt',
65
+ pipeline=test_pipeline))
66
+ test_dataloader = val_dataloader
67
+
68
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
69
+ test_evaluator = val_evaluator
configs/_base_/datasets/pascal_voc12_aug.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'PascalVOCDataset'
3
+ data_root = 'data/VOCdevkit/VOC2012'
4
+ crop_size = (512, 512)
5
+ train_pipeline = [
6
+ dict(type='LoadImageFromFile'),
7
+ dict(type='LoadAnnotations'),
8
+ dict(
9
+ type='RandomResize',
10
+ scale=(2048, 512),
11
+ ratio_range=(0.5, 2.0),
12
+ keep_ratio=True),
13
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14
+ dict(type='RandomFlip', prob=0.5),
15
+ dict(type='PhotoMetricDistortion'),
16
+ dict(type='Pad', size=crop_size),
17
+ dict(type='PackSegInputs')
18
+ ]
19
+
20
+ test_pipeline = [
21
+ dict(type='LoadImageFromFile'),
22
+ dict(type='Resize', scale=(2048, 512), keep_ratio=True),
23
+ # add loading annotation after ``Resize`` because ground truth
24
+ # does not need to do resize data transform
25
+ dict(type='LoadAnnotations'),
26
+ dict(type='PackSegInputs')
27
+ ]
28
+ img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
29
+ tta_pipeline = [
30
+ dict(type='LoadImageFromFile', backend_args=None),
31
+ dict(
32
+ type='TestTimeAug',
33
+ transforms=[
34
+ [
35
+ dict(type='Resize', scale_factor=r, keep_ratio=True)
36
+ for r in img_ratios
37
+ ],
38
+ [
39
+ dict(type='RandomFlip', prob=0., direction='horizontal'),
40
+ dict(type='RandomFlip', prob=1., direction='horizontal')
41
+ ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
42
+ ])
43
+ ]
44
+ dataset_train = dict(
45
+ type=dataset_type,
46
+ data_root=data_root,
47
+ data_prefix=dict(img_path='JPEGImages', seg_map_path='SegmentationClass'),
48
+ ann_file='ImageSets/Segmentation/train.txt',
49
+ pipeline=train_pipeline)
50
+
51
+ dataset_aug = dict(
52
+ type=dataset_type,
53
+ data_root=data_root,
54
+ data_prefix=dict(
55
+ img_path='JPEGImages', seg_map_path='SegmentationClassAug'),
56
+ ann_file='ImageSets/Segmentation/aug.txt',
57
+ pipeline=train_pipeline)
58
+
59
+ train_dataloader = dict(
60
+ batch_size=4,
61
+ num_workers=4,
62
+ persistent_workers=True,
63
+ sampler=dict(type='InfiniteSampler', shuffle=True),
64
+ dataset=dict(type='ConcatDataset', datasets=[dataset_train, dataset_aug]))
65
+
66
+ val_dataloader = dict(
67
+ batch_size=1,
68
+ num_workers=4,
69
+ persistent_workers=True,
70
+ sampler=dict(type='DefaultSampler', shuffle=False),
71
+ dataset=dict(
72
+ type=dataset_type,
73
+ data_root=data_root,
74
+ data_prefix=dict(
75
+ img_path='JPEGImages', seg_map_path='SegmentationClass'),
76
+ ann_file='ImageSets/Segmentation/val.txt',
77
+ pipeline=test_pipeline))
78
+ test_dataloader = val_dataloader
79
+
80
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
81
+ test_evaluator = val_evaluator
configs/_base_/datasets/potsdam.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'PotsdamDataset'
3
+ data_root = 'data/potsdam'
4
+ crop_size = (512, 512)
5
+ train_pipeline = [
6
+ dict(type='LoadImageFromFile'),
7
+ dict(type='LoadAnnotations', reduce_zero_label=True),
8
+ dict(
9
+ type='RandomResize',
10
+ scale=(512, 512),
11
+ ratio_range=(0.5, 2.0),
12
+ keep_ratio=True),
13
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14
+ dict(type='RandomFlip', prob=0.5),
15
+ dict(type='PhotoMetricDistortion'),
16
+ dict(type='PackSegInputs')
17
+ ]
18
+ test_pipeline = [
19
+ dict(type='LoadImageFromFile'),
20
+ dict(type='Resize', scale=(512, 512), keep_ratio=True),
21
+ # add loading annotation after ``Resize`` because ground truth
22
+ # does not need to do resize data transform
23
+ dict(type='LoadAnnotations', reduce_zero_label=True),
24
+ dict(type='PackSegInputs')
25
+ ]
26
+ img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
27
+ tta_pipeline = [
28
+ dict(type='LoadImageFromFile', backend_args=None),
29
+ dict(
30
+ type='TestTimeAug',
31
+ transforms=[
32
+ [
33
+ dict(type='Resize', scale_factor=r, keep_ratio=True)
34
+ for r in img_ratios
35
+ ],
36
+ [
37
+ dict(type='RandomFlip', prob=0., direction='horizontal'),
38
+ dict(type='RandomFlip', prob=1., direction='horizontal')
39
+ ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
40
+ ])
41
+ ]
42
+ train_dataloader = dict(
43
+ batch_size=4,
44
+ num_workers=4,
45
+ persistent_workers=True,
46
+ sampler=dict(type='InfiniteSampler', shuffle=True),
47
+ dataset=dict(
48
+ type=dataset_type,
49
+ data_root=data_root,
50
+ data_prefix=dict(
51
+ img_path='img_dir/train', seg_map_path='ann_dir/train'),
52
+ pipeline=train_pipeline))
53
+ val_dataloader = dict(
54
+ batch_size=1,
55
+ num_workers=4,
56
+ persistent_workers=True,
57
+ sampler=dict(type='DefaultSampler', shuffle=False),
58
+ dataset=dict(
59
+ type=dataset_type,
60
+ data_root=data_root,
61
+ data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
62
+ pipeline=test_pipeline))
63
+ test_dataloader = val_dataloader
64
+
65
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
66
+ test_evaluator = val_evaluator
configs/_base_/datasets/refuge.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'REFUGEDataset'
3
+ data_root = 'data/REFUGE'
4
+ train_img_scale = (2056, 2124)
5
+ val_img_scale = (1634, 1634)
6
+ test_img_scale = (1634, 1634)
7
+ crop_size = (512, 512)
8
+
9
+ train_pipeline = [
10
+ dict(type='LoadImageFromFile'),
11
+ dict(type='LoadAnnotations', reduce_zero_label=False),
12
+ dict(
13
+ type='RandomResize',
14
+ scale=train_img_scale,
15
+ ratio_range=(0.5, 2.0),
16
+ keep_ratio=True),
17
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
18
+ dict(type='RandomFlip', prob=0.5),
19
+ dict(type='PhotoMetricDistortion'),
20
+ dict(type='PackSegInputs')
21
+ ]
22
+ val_pipeline = [
23
+ dict(type='LoadImageFromFile'),
24
+ dict(type='Resize', scale=val_img_scale, keep_ratio=True),
25
+ # add loading annotation after ``Resize`` because ground truth
26
+ # does not need to do resize data transform
27
+ dict(type='LoadAnnotations', reduce_zero_label=False),
28
+ dict(type='PackSegInputs')
29
+ ]
30
+ test_pipeline = [
31
+ dict(type='LoadImageFromFile'),
32
+ dict(type='Resize', scale=test_img_scale, keep_ratio=True),
33
+ # add loading annotation after ``Resize`` because ground truth
34
+ # does not need to do resize data transform
35
+ dict(type='LoadAnnotations', reduce_zero_label=False),
36
+ dict(type='PackSegInputs')
37
+ ]
38
+ img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
39
+ tta_pipeline = [
40
+ dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
41
+ dict(
42
+ type='TestTimeAug',
43
+ transforms=[
44
+ [
45
+ dict(type='Resize', scale_factor=r, keep_ratio=True)
46
+ for r in img_ratios
47
+ ],
48
+ [
49
+ dict(type='RandomFlip', prob=0., direction='horizontal'),
50
+ dict(type='RandomFlip', prob=1., direction='horizontal')
51
+ ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
52
+ ])
53
+ ]
54
+ train_dataloader = dict(
55
+ batch_size=4,
56
+ num_workers=4,
57
+ persistent_workers=True,
58
+ sampler=dict(type='InfiniteSampler', shuffle=True),
59
+ dataset=dict(
60
+ type=dataset_type,
61
+ data_root=data_root,
62
+ data_prefix=dict(
63
+ img_path='images/training', seg_map_path='annotations/training'),
64
+ pipeline=train_pipeline))
65
+ val_dataloader = dict(
66
+ batch_size=1,
67
+ num_workers=4,
68
+ persistent_workers=True,
69
+ sampler=dict(type='DefaultSampler', shuffle=False),
70
+ dataset=dict(
71
+ type=dataset_type,
72
+ data_root=data_root,
73
+ data_prefix=dict(
74
+ img_path='images/validation',
75
+ seg_map_path='annotations/validation'),
76
+ pipeline=val_pipeline))
77
+ test_dataloader = dict(
78
+ batch_size=1,
79
+ num_workers=4,
80
+ persistent_workers=True,
81
+ sampler=dict(type='DefaultSampler', shuffle=False),
82
+ dataset=dict(
83
+ type=dataset_type,
84
+ data_root=data_root,
85
+ data_prefix=dict(
86
+ img_path='images/test', seg_map_path='annotations/test'),
87
+ pipeline=val_pipeline))
88
+
89
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
90
+ test_evaluator = val_evaluator
configs/_base_/datasets/stare.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'STAREDataset'
3
+ data_root = 'data/STARE'
4
+ img_scale = (605, 700)
5
+ crop_size = (128, 128)
6
+ train_pipeline = [
7
+ dict(type='LoadImageFromFile'),
8
+ dict(type='LoadAnnotations'),
9
+ dict(
10
+ type='RandomResize',
11
+ scale=img_scale,
12
+ ratio_range=(0.5, 2.0),
13
+ keep_ratio=True),
14
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
15
+ dict(type='RandomFlip', prob=0.5),
16
+ dict(type='PhotoMetricDistortion'),
17
+ dict(type='PackSegInputs')
18
+ ]
19
+ test_pipeline = [
20
+ dict(type='LoadImageFromFile'),
21
+ dict(type='Resize', scale=img_scale, keep_ratio=True),
22
+ # add loading annotation after ``Resize`` because ground truth
23
+ # does not need to do resize data transform
24
+ dict(type='LoadAnnotations'),
25
+ dict(type='PackSegInputs')
26
+ ]
27
+ img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
28
+ tta_pipeline = [
29
+ dict(type='LoadImageFromFile', backend_args=None),
30
+ dict(
31
+ type='TestTimeAug',
32
+ transforms=[
33
+ [
34
+ dict(type='Resize', scale_factor=r, keep_ratio=True)
35
+ for r in img_ratios
36
+ ],
37
+ [
38
+ dict(type='RandomFlip', prob=0., direction='horizontal'),
39
+ dict(type='RandomFlip', prob=1., direction='horizontal')
40
+ ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
41
+ ])
42
+ ]
43
+ train_dataloader = dict(
44
+ batch_size=4,
45
+ num_workers=4,
46
+ persistent_workers=True,
47
+ sampler=dict(type='InfiniteSampler', shuffle=True),
48
+ dataset=dict(
49
+ type='RepeatDataset',
50
+ times=40000,
51
+ dataset=dict(
52
+ type=dataset_type,
53
+ data_root=data_root,
54
+ data_prefix=dict(
55
+ img_path='images/training',
56
+ seg_map_path='annotations/training'),
57
+ pipeline=train_pipeline)))
58
+ val_dataloader = dict(
59
+ batch_size=1,
60
+ num_workers=4,
61
+ persistent_workers=True,
62
+ sampler=dict(type='DefaultSampler', shuffle=False),
63
+ dataset=dict(
64
+ type=dataset_type,
65
+ data_root=data_root,
66
+ data_prefix=dict(
67
+ img_path='images/validation',
68
+ seg_map_path='annotations/validation'),
69
+ pipeline=test_pipeline))
70
+ test_dataloader = val_dataloader
71
+
72
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
73
+ test_evaluator = val_evaluator
configs/_base_/datasets/synapse.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'SynapseDataset'
2
+ data_root = 'data/synapse/'
3
+ img_scale = (224, 224)
4
+ train_pipeline = [
5
+ dict(type='LoadImageFromFile'),
6
+ dict(type='LoadAnnotations'),
7
+ dict(type='Resize', scale=img_scale, keep_ratio=True),
8
+ dict(type='RandomRotFlip', rotate_prob=0.5, flip_prob=0.5, degree=20),
9
+ dict(type='PackSegInputs')
10
+ ]
11
+ test_pipeline = [
12
+ dict(type='LoadImageFromFile'),
13
+ dict(type='Resize', scale=img_scale, keep_ratio=True),
14
+ dict(type='LoadAnnotations'),
15
+ dict(type='PackSegInputs')
16
+ ]
17
+ train_dataloader = dict(
18
+ batch_size=6,
19
+ num_workers=2,
20
+ persistent_workers=True,
21
+ sampler=dict(type='InfiniteSampler', shuffle=True),
22
+ dataset=dict(
23
+ type=dataset_type,
24
+ data_root=data_root,
25
+ data_prefix=dict(
26
+ img_path='img_dir/train', seg_map_path='ann_dir/train'),
27
+ pipeline=train_pipeline))
28
+ val_dataloader = dict(
29
+ batch_size=1,
30
+ num_workers=4,
31
+ persistent_workers=True,
32
+ sampler=dict(type='DefaultSampler', shuffle=False),
33
+ dataset=dict(
34
+ type=dataset_type,
35
+ data_root=data_root,
36
+ data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
37
+ pipeline=test_pipeline))
38
+ test_dataloader = val_dataloader
39
+
40
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
41
+ test_evaluator = val_evaluator
configs/_base_/datasets/vaihingen.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'ISPRSDataset'
3
+ data_root = 'data/vaihingen'
4
+ crop_size = (512, 512)
5
+ train_pipeline = [
6
+ dict(type='LoadImageFromFile'),
7
+ dict(type='LoadAnnotations', reduce_zero_label=True),
8
+ dict(
9
+ type='RandomResize',
10
+ scale=(512, 512),
11
+ ratio_range=(0.5, 2.0),
12
+ keep_ratio=True),
13
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
14
+ dict(type='RandomFlip', prob=0.5),
15
+ dict(type='PhotoMetricDistortion'),
16
+ dict(type='PackSegInputs')
17
+ ]
18
+ test_pipeline = [
19
+ dict(type='LoadImageFromFile'),
20
+ dict(type='Resize', scale=(512, 512), keep_ratio=True),
21
+ # add loading annotation after ``Resize`` because ground truth
22
+ # does not need to do resize data transform
23
+ dict(type='LoadAnnotations', reduce_zero_label=True),
24
+ dict(type='PackSegInputs')
25
+ ]
26
+ img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
27
+ tta_pipeline = [
28
+ dict(type='LoadImageFromFile', backend_args=None),
29
+ dict(
30
+ type='TestTimeAug',
31
+ transforms=[
32
+ [
33
+ dict(type='Resize', scale_factor=r, keep_ratio=True)
34
+ for r in img_ratios
35
+ ],
36
+ [
37
+ dict(type='RandomFlip', prob=0., direction='horizontal'),
38
+ dict(type='RandomFlip', prob=1., direction='horizontal')
39
+ ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
40
+ ])
41
+ ]
42
+ train_dataloader = dict(
43
+ batch_size=4,
44
+ num_workers=4,
45
+ persistent_workers=True,
46
+ sampler=dict(type='InfiniteSampler', shuffle=True),
47
+ dataset=dict(
48
+ type=dataset_type,
49
+ data_root=data_root,
50
+ data_prefix=dict(
51
+ img_path='img_dir/train', seg_map_path='ann_dir/train'),
52
+ pipeline=train_pipeline))
53
+ val_dataloader = dict(
54
+ batch_size=1,
55
+ num_workers=4,
56
+ persistent_workers=True,
57
+ sampler=dict(type='DefaultSampler', shuffle=False),
58
+ dataset=dict(
59
+ type=dataset_type,
60
+ data_root=data_root,
61
+ data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
62
+ pipeline=test_pipeline))
63
+ test_dataloader = val_dataloader
64
+
65
+ val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
66
+ test_evaluator = val_evaluator
configs/_base_/default_runtime.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_scope = 'mmseg'
2
+ env_cfg = dict(
3
+ cudnn_benchmark=True,
4
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
5
+ dist_cfg=dict(backend='nccl'),
6
+ )
7
+ vis_backends = [dict(type='LocalVisBackend')]
8
+ visualizer = dict(
9
+ type='SegLocalVisualizer', vis_backends=vis_backends, name='visualizer')
10
+ log_processor = dict(by_epoch=False)
11
+ log_level = 'INFO'
12
+ load_from = None
13
+ resume = False
14
+
15
+ tta_model = dict(type='SegTTAModel')
configs/_base_/models/ann_r50-d8.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ data_preprocessor = dict(
4
+ type='SegDataPreProcessor',
5
+ mean=[123.675, 116.28, 103.53],
6
+ std=[58.395, 57.12, 57.375],
7
+ bgr_to_rgb=True,
8
+ pad_val=0,
9
+ seg_pad_val=255)
10
+ model = dict(
11
+ type='EncoderDecoder',
12
+ data_preprocessor=data_preprocessor,
13
+ pretrained='open-mmlab://resnet50_v1c',
14
+ backbone=dict(
15
+ type='ResNetV1c',
16
+ depth=50,
17
+ num_stages=4,
18
+ out_indices=(0, 1, 2, 3),
19
+ dilations=(1, 1, 2, 4),
20
+ strides=(1, 2, 1, 1),
21
+ norm_cfg=norm_cfg,
22
+ norm_eval=False,
23
+ style='pytorch',
24
+ contract_dilation=True),
25
+ decode_head=dict(
26
+ type='ANNHead',
27
+ in_channels=[1024, 2048],
28
+ in_index=[2, 3],
29
+ channels=512,
30
+ project_channels=256,
31
+ query_scales=(1, ),
32
+ key_pool_scales=(1, 3, 6, 8),
33
+ dropout_ratio=0.1,
34
+ num_classes=19,
35
+ norm_cfg=norm_cfg,
36
+ align_corners=False,
37
+ loss_decode=dict(
38
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
39
+ auxiliary_head=dict(
40
+ type='FCNHead',
41
+ in_channels=1024,
42
+ in_index=2,
43
+ channels=256,
44
+ num_convs=1,
45
+ concat_input=False,
46
+ dropout_ratio=0.1,
47
+ num_classes=19,
48
+ norm_cfg=norm_cfg,
49
+ align_corners=False,
50
+ loss_decode=dict(
51
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
52
+ # model training and testing settings
53
+ train_cfg=dict(),
54
+ test_cfg=dict(mode='whole'))
configs/_base_/models/apcnet_r50-d8.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ data_preprocessor = dict(
4
+ type='SegDataPreProcessor',
5
+ mean=[123.675, 116.28, 103.53],
6
+ std=[58.395, 57.12, 57.375],
7
+ bgr_to_rgb=True,
8
+ pad_val=0,
9
+ seg_pad_val=255)
10
+ model = dict(
11
+ type='EncoderDecoder',
12
+ data_preprocessor=data_preprocessor,
13
+ pretrained='open-mmlab://resnet50_v1c',
14
+ backbone=dict(
15
+ type='ResNetV1c',
16
+ depth=50,
17
+ num_stages=4,
18
+ out_indices=(0, 1, 2, 3),
19
+ dilations=(1, 1, 2, 4),
20
+ strides=(1, 2, 1, 1),
21
+ norm_cfg=norm_cfg,
22
+ norm_eval=False,
23
+ style='pytorch',
24
+ contract_dilation=True),
25
+ decode_head=dict(
26
+ type='APCHead',
27
+ in_channels=2048,
28
+ in_index=3,
29
+ channels=512,
30
+ pool_scales=(1, 2, 3, 6),
31
+ dropout_ratio=0.1,
32
+ num_classes=19,
33
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
34
+ align_corners=False,
35
+ loss_decode=dict(
36
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37
+ auxiliary_head=dict(
38
+ type='FCNHead',
39
+ in_channels=1024,
40
+ in_index=2,
41
+ channels=256,
42
+ num_convs=1,
43
+ concat_input=False,
44
+ dropout_ratio=0.1,
45
+ num_classes=19,
46
+ norm_cfg=norm_cfg,
47
+ align_corners=False,
48
+ loss_decode=dict(
49
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50
+ # model training and testing settings
51
+ train_cfg=dict(),
52
+ test_cfg=dict(mode='whole'))
configs/_base_/models/bisenetv1_r18-d32.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ data_preprocessor = dict(
4
+ type='SegDataPreProcessor',
5
+ mean=[123.675, 116.28, 103.53],
6
+ std=[58.395, 57.12, 57.375],
7
+ bgr_to_rgb=True,
8
+ pad_val=0,
9
+ seg_pad_val=255)
10
+ model = dict(
11
+ type='EncoderDecoder',
12
+ data_preprocessor=data_preprocessor,
13
+ backbone=dict(
14
+ type='BiSeNetV1',
15
+ in_channels=3,
16
+ context_channels=(128, 256, 512),
17
+ spatial_channels=(64, 64, 64, 128),
18
+ out_indices=(0, 1, 2),
19
+ out_channels=256,
20
+ backbone_cfg=dict(
21
+ type='ResNet',
22
+ in_channels=3,
23
+ depth=18,
24
+ num_stages=4,
25
+ out_indices=(0, 1, 2, 3),
26
+ dilations=(1, 1, 1, 1),
27
+ strides=(1, 2, 2, 2),
28
+ norm_cfg=norm_cfg,
29
+ norm_eval=False,
30
+ style='pytorch',
31
+ contract_dilation=True),
32
+ norm_cfg=norm_cfg,
33
+ align_corners=False,
34
+ init_cfg=None),
35
+ decode_head=dict(
36
+ type='FCNHead',
37
+ in_channels=256,
38
+ in_index=0,
39
+ channels=256,
40
+ num_convs=1,
41
+ concat_input=False,
42
+ dropout_ratio=0.1,
43
+ num_classes=19,
44
+ norm_cfg=norm_cfg,
45
+ align_corners=False,
46
+ loss_decode=dict(
47
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
48
+ auxiliary_head=[
49
+ dict(
50
+ type='FCNHead',
51
+ in_channels=128,
52
+ channels=64,
53
+ num_convs=1,
54
+ num_classes=19,
55
+ in_index=1,
56
+ norm_cfg=norm_cfg,
57
+ concat_input=False,
58
+ align_corners=False,
59
+ loss_decode=dict(
60
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
61
+ dict(
62
+ type='FCNHead',
63
+ in_channels=128,
64
+ channels=64,
65
+ num_convs=1,
66
+ num_classes=19,
67
+ in_index=2,
68
+ norm_cfg=norm_cfg,
69
+ concat_input=False,
70
+ align_corners=False,
71
+ loss_decode=dict(
72
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
73
+ ],
74
+ # model training and testing settings
75
+ train_cfg=dict(),
76
+ test_cfg=dict(mode='whole'))
configs/_base_/models/bisenetv2.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ data_preprocessor = dict(
4
+ type='SegDataPreProcessor',
5
+ mean=[123.675, 116.28, 103.53],
6
+ std=[58.395, 57.12, 57.375],
7
+ bgr_to_rgb=True,
8
+ pad_val=0,
9
+ seg_pad_val=255)
10
+ model = dict(
11
+ type='EncoderDecoder',
12
+ data_preprocessor=data_preprocessor,
13
+ pretrained=None,
14
+ backbone=dict(
15
+ type='BiSeNetV2',
16
+ detail_channels=(64, 64, 128),
17
+ semantic_channels=(16, 32, 64, 128),
18
+ semantic_expansion_ratio=6,
19
+ bga_channels=128,
20
+ out_indices=(0, 1, 2, 3, 4),
21
+ init_cfg=None,
22
+ align_corners=False),
23
+ decode_head=dict(
24
+ type='FCNHead',
25
+ in_channels=128,
26
+ in_index=0,
27
+ channels=1024,
28
+ num_convs=1,
29
+ concat_input=False,
30
+ dropout_ratio=0.1,
31
+ num_classes=19,
32
+ norm_cfg=norm_cfg,
33
+ align_corners=False,
34
+ loss_decode=dict(
35
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
36
+ auxiliary_head=[
37
+ dict(
38
+ type='FCNHead',
39
+ in_channels=16,
40
+ channels=16,
41
+ num_convs=2,
42
+ num_classes=19,
43
+ in_index=1,
44
+ norm_cfg=norm_cfg,
45
+ concat_input=False,
46
+ align_corners=False,
47
+ loss_decode=dict(
48
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
49
+ dict(
50
+ type='FCNHead',
51
+ in_channels=32,
52
+ channels=64,
53
+ num_convs=2,
54
+ num_classes=19,
55
+ in_index=2,
56
+ norm_cfg=norm_cfg,
57
+ concat_input=False,
58
+ align_corners=False,
59
+ loss_decode=dict(
60
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
61
+ dict(
62
+ type='FCNHead',
63
+ in_channels=64,
64
+ channels=256,
65
+ num_convs=2,
66
+ num_classes=19,
67
+ in_index=3,
68
+ norm_cfg=norm_cfg,
69
+ concat_input=False,
70
+ align_corners=False,
71
+ loss_decode=dict(
72
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
73
+ dict(
74
+ type='FCNHead',
75
+ in_channels=128,
76
+ channels=1024,
77
+ num_convs=2,
78
+ num_classes=19,
79
+ in_index=4,
80
+ norm_cfg=norm_cfg,
81
+ concat_input=False,
82
+ align_corners=False,
83
+ loss_decode=dict(
84
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
85
+ ],
86
+ # model training and testing settings
87
+ train_cfg=dict(),
88
+ test_cfg=dict(mode='whole'))
configs/_base_/models/ccnet_r50-d8.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ data_preprocessor = dict(
4
+ type='SegDataPreProcessor',
5
+ mean=[123.675, 116.28, 103.53],
6
+ std=[58.395, 57.12, 57.375],
7
+ bgr_to_rgb=True,
8
+ pad_val=0,
9
+ seg_pad_val=255)
10
+ model = dict(
11
+ type='EncoderDecoder',
12
+ data_preprocessor=data_preprocessor,
13
+ pretrained='open-mmlab://resnet50_v1c',
14
+ backbone=dict(
15
+ type='ResNetV1c',
16
+ depth=50,
17
+ num_stages=4,
18
+ out_indices=(0, 1, 2, 3),
19
+ dilations=(1, 1, 2, 4),
20
+ strides=(1, 2, 1, 1),
21
+ norm_cfg=norm_cfg,
22
+ norm_eval=False,
23
+ style='pytorch',
24
+ contract_dilation=True),
25
+ decode_head=dict(
26
+ type='CCHead',
27
+ in_channels=2048,
28
+ in_index=3,
29
+ channels=512,
30
+ recurrence=2,
31
+ dropout_ratio=0.1,
32
+ num_classes=19,
33
+ norm_cfg=norm_cfg,
34
+ align_corners=False,
35
+ loss_decode=dict(
36
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37
+ auxiliary_head=dict(
38
+ type='FCNHead',
39
+ in_channels=1024,
40
+ in_index=2,
41
+ channels=256,
42
+ num_convs=1,
43
+ concat_input=False,
44
+ dropout_ratio=0.1,
45
+ num_classes=19,
46
+ norm_cfg=norm_cfg,
47
+ align_corners=False,
48
+ loss_decode=dict(
49
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50
+ # model training and testing settings
51
+ train_cfg=dict(),
52
+ test_cfg=dict(mode='whole'))
configs/_base_/models/cgnet.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True)
3
+ data_preprocessor = dict(
4
+ type='SegDataPreProcessor',
5
+ mean=[72.39239876, 82.90891754, 73.15835921],
6
+ std=[1, 1, 1],
7
+ bgr_to_rgb=True,
8
+ pad_val=0,
9
+ seg_pad_val=255)
10
+ model = dict(
11
+ type='EncoderDecoder',
12
+ data_preprocessor=data_preprocessor,
13
+ backbone=dict(
14
+ type='CGNet',
15
+ norm_cfg=norm_cfg,
16
+ in_channels=3,
17
+ num_channels=(32, 64, 128),
18
+ num_blocks=(3, 21),
19
+ dilations=(2, 4),
20
+ reductions=(8, 16)),
21
+ decode_head=dict(
22
+ type='FCNHead',
23
+ in_channels=256,
24
+ in_index=2,
25
+ channels=256,
26
+ num_convs=0,
27
+ concat_input=False,
28
+ dropout_ratio=0,
29
+ num_classes=19,
30
+ norm_cfg=norm_cfg,
31
+ loss_decode=dict(
32
+ type='CrossEntropyLoss',
33
+ use_sigmoid=False,
34
+ loss_weight=1.0,
35
+ class_weight=[
36
+ 2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352,
37
+ 10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905,
38
+ 10.347791, 6.3927646, 10.226669, 10.241062, 10.280587,
39
+ 10.396974, 10.055647
40
+ ])),
41
+ # model training and testing settings
42
+ train_cfg=dict(sampler=None),
43
+ test_cfg=dict(mode='whole'))
configs/_base_/models/danet_r50-d8.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ data_preprocessor = dict(
4
+ type='SegDataPreProcessor',
5
+ mean=[123.675, 116.28, 103.53],
6
+ std=[58.395, 57.12, 57.375],
7
+ bgr_to_rgb=True,
8
+ pad_val=0,
9
+ seg_pad_val=255)
10
+ model = dict(
11
+ type='EncoderDecoder',
12
+ data_preprocessor=data_preprocessor,
13
+ pretrained='open-mmlab://resnet50_v1c',
14
+ backbone=dict(
15
+ type='ResNetV1c',
16
+ depth=50,
17
+ num_stages=4,
18
+ out_indices=(0, 1, 2, 3),
19
+ dilations=(1, 1, 2, 4),
20
+ strides=(1, 2, 1, 1),
21
+ norm_cfg=norm_cfg,
22
+ norm_eval=False,
23
+ style='pytorch',
24
+ contract_dilation=True),
25
+ decode_head=dict(
26
+ type='DAHead',
27
+ in_channels=2048,
28
+ in_index=3,
29
+ channels=512,
30
+ pam_channels=64,
31
+ dropout_ratio=0.1,
32
+ num_classes=19,
33
+ norm_cfg=norm_cfg,
34
+ align_corners=False,
35
+ loss_decode=dict(
36
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37
+ auxiliary_head=dict(
38
+ type='FCNHead',
39
+ in_channels=1024,
40
+ in_index=2,
41
+ channels=256,
42
+ num_convs=1,
43
+ concat_input=False,
44
+ dropout_ratio=0.1,
45
+ num_classes=19,
46
+ norm_cfg=norm_cfg,
47
+ align_corners=False,
48
+ loss_decode=dict(
49
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50
+ # model training and testing settings
51
+ train_cfg=dict(),
52
+ test_cfg=dict(mode='whole'))
configs/_base_/models/deeplabv3_r50-d8.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ data_preprocessor = dict(
4
+ type='SegDataPreProcessor',
5
+ mean=[123.675, 116.28, 103.53],
6
+ std=[58.395, 57.12, 57.375],
7
+ bgr_to_rgb=True,
8
+ pad_val=0,
9
+ seg_pad_val=255)
10
+ model = dict(
11
+ type='EncoderDecoder',
12
+ data_preprocessor=data_preprocessor,
13
+ pretrained='open-mmlab://resnet50_v1c',
14
+ backbone=dict(
15
+ type='ResNetV1c',
16
+ depth=50,
17
+ num_stages=4,
18
+ out_indices=(0, 1, 2, 3),
19
+ dilations=(1, 1, 2, 4),
20
+ strides=(1, 2, 1, 1),
21
+ norm_cfg=norm_cfg,
22
+ norm_eval=False,
23
+ style='pytorch',
24
+ contract_dilation=True),
25
+ decode_head=dict(
26
+ type='ASPPHead',
27
+ in_channels=2048,
28
+ in_index=3,
29
+ channels=512,
30
+ dilations=(1, 12, 24, 36),
31
+ dropout_ratio=0.1,
32
+ num_classes=19,
33
+ norm_cfg=norm_cfg,
34
+ align_corners=False,
35
+ loss_decode=dict(
36
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37
+ auxiliary_head=dict(
38
+ type='FCNHead',
39
+ in_channels=1024,
40
+ in_index=2,
41
+ channels=256,
42
+ num_convs=1,
43
+ concat_input=False,
44
+ dropout_ratio=0.1,
45
+ num_classes=19,
46
+ norm_cfg=norm_cfg,
47
+ align_corners=False,
48
+ loss_decode=dict(
49
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50
+ # model training and testing settings
51
+ train_cfg=dict(),
52
+ test_cfg=dict(mode='whole'))
configs/_base_/models/deeplabv3_unet_s5-d16.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ data_preprocessor = dict(
4
+ type='SegDataPreProcessor',
5
+ mean=[123.675, 116.28, 103.53],
6
+ std=[58.395, 57.12, 57.375],
7
+ bgr_to_rgb=True,
8
+ pad_val=0,
9
+ seg_pad_val=255)
10
+ model = dict(
11
+ type='EncoderDecoder',
12
+ data_preprocessor=data_preprocessor,
13
+ pretrained=None,
14
+ backbone=dict(
15
+ type='UNet',
16
+ in_channels=3,
17
+ base_channels=64,
18
+ num_stages=5,
19
+ strides=(1, 1, 1, 1, 1),
20
+ enc_num_convs=(2, 2, 2, 2, 2),
21
+ dec_num_convs=(2, 2, 2, 2),
22
+ downsamples=(True, True, True, True),
23
+ enc_dilations=(1, 1, 1, 1, 1),
24
+ dec_dilations=(1, 1, 1, 1),
25
+ with_cp=False,
26
+ conv_cfg=None,
27
+ norm_cfg=norm_cfg,
28
+ act_cfg=dict(type='ReLU'),
29
+ upsample_cfg=dict(type='InterpConv'),
30
+ norm_eval=False),
31
+ decode_head=dict(
32
+ type='ASPPHead',
33
+ in_channels=64,
34
+ in_index=4,
35
+ channels=16,
36
+ dilations=(1, 12, 24, 36),
37
+ dropout_ratio=0.1,
38
+ num_classes=2,
39
+ norm_cfg=norm_cfg,
40
+ align_corners=False,
41
+ loss_decode=dict(
42
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
43
+ auxiliary_head=dict(
44
+ type='FCNHead',
45
+ in_channels=128,
46
+ in_index=3,
47
+ channels=64,
48
+ num_convs=1,
49
+ concat_input=False,
50
+ dropout_ratio=0.1,
51
+ num_classes=2,
52
+ norm_cfg=norm_cfg,
53
+ align_corners=False,
54
+ loss_decode=dict(
55
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
56
+ # model training and testing settings
57
+ train_cfg=dict(),
58
+ test_cfg=dict(mode='slide', crop_size=256, stride=170))
configs/_base_/models/deeplabv3plus_r50-d8.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ data_preprocessor = dict(
4
+ type='SegDataPreProcessor',
5
+ mean=[123.675, 116.28, 103.53],
6
+ std=[58.395, 57.12, 57.375],
7
+ bgr_to_rgb=True,
8
+ pad_val=0,
9
+ seg_pad_val=255)
10
+ model = dict(
11
+ type='EncoderDecoder',
12
+ data_preprocessor=data_preprocessor,
13
+ pretrained='open-mmlab://resnet50_v1c',
14
+ backbone=dict(
15
+ type='ResNetV1c',
16
+ depth=50,
17
+ num_stages=4,
18
+ out_indices=(0, 1, 2, 3),
19
+ dilations=(1, 1, 2, 4),
20
+ strides=(1, 2, 1, 1),
21
+ norm_cfg=norm_cfg,
22
+ norm_eval=False,
23
+ style='pytorch',
24
+ contract_dilation=True),
25
+ decode_head=dict(
26
+ type='DepthwiseSeparableASPPHead',
27
+ in_channels=2048,
28
+ in_index=3,
29
+ channels=512,
30
+ dilations=(1, 12, 24, 36),
31
+ c1_in_channels=256,
32
+ c1_channels=48,
33
+ dropout_ratio=0.1,
34
+ num_classes=19,
35
+ norm_cfg=norm_cfg,
36
+ align_corners=False,
37
+ loss_decode=dict(
38
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
39
+ auxiliary_head=dict(
40
+ type='FCNHead',
41
+ in_channels=1024,
42
+ in_index=2,
43
+ channels=256,
44
+ num_convs=1,
45
+ concat_input=False,
46
+ dropout_ratio=0.1,
47
+ num_classes=19,
48
+ norm_cfg=norm_cfg,
49
+ align_corners=False,
50
+ loss_decode=dict(
51
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
52
+ # model training and testing settings
53
+ train_cfg=dict(),
54
+ test_cfg=dict(mode='whole'))
configs/_base_/models/dmnet_r50-d8.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ data_preprocessor = dict(
4
+ type='SegDataPreProcessor',
5
+ mean=[123.675, 116.28, 103.53],
6
+ std=[58.395, 57.12, 57.375],
7
+ bgr_to_rgb=True,
8
+ pad_val=0,
9
+ seg_pad_val=255)
10
+ model = dict(
11
+ type='EncoderDecoder',
12
+ data_preprocessor=data_preprocessor,
13
+ pretrained='open-mmlab://resnet50_v1c',
14
+ backbone=dict(
15
+ type='ResNetV1c',
16
+ depth=50,
17
+ num_stages=4,
18
+ out_indices=(0, 1, 2, 3),
19
+ dilations=(1, 1, 2, 4),
20
+ strides=(1, 2, 1, 1),
21
+ norm_cfg=norm_cfg,
22
+ norm_eval=False,
23
+ style='pytorch',
24
+ contract_dilation=True),
25
+ decode_head=dict(
26
+ type='DMHead',
27
+ in_channels=2048,
28
+ in_index=3,
29
+ channels=512,
30
+ filter_sizes=(1, 3, 5, 7),
31
+ dropout_ratio=0.1,
32
+ num_classes=19,
33
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
34
+ align_corners=False,
35
+ loss_decode=dict(
36
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
37
+ auxiliary_head=dict(
38
+ type='FCNHead',
39
+ in_channels=1024,
40
+ in_index=2,
41
+ channels=256,
42
+ num_convs=1,
43
+ concat_input=False,
44
+ dropout_ratio=0.1,
45
+ num_classes=19,
46
+ norm_cfg=norm_cfg,
47
+ align_corners=False,
48
+ loss_decode=dict(
49
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
50
+ # model training and testing settings
51
+ train_cfg=dict(),
52
+ test_cfg=dict(mode='whole'))
configs/_base_/models/dnl_r50-d8.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ data_preprocessor = dict(
4
+ type='SegDataPreProcessor',
5
+ mean=[123.675, 116.28, 103.53],
6
+ std=[58.395, 57.12, 57.375],
7
+ bgr_to_rgb=True,
8
+ pad_val=0,
9
+ seg_pad_val=255)
10
+ model = dict(
11
+ type='EncoderDecoder',
12
+ data_preprocessor=data_preprocessor,
13
+ pretrained='open-mmlab://resnet50_v1c',
14
+ backbone=dict(
15
+ type='ResNetV1c',
16
+ depth=50,
17
+ num_stages=4,
18
+ out_indices=(0, 1, 2, 3),
19
+ dilations=(1, 1, 2, 4),
20
+ strides=(1, 2, 1, 1),
21
+ norm_cfg=norm_cfg,
22
+ norm_eval=False,
23
+ style='pytorch',
24
+ contract_dilation=True),
25
+ decode_head=dict(
26
+ type='DNLHead',
27
+ in_channels=2048,
28
+ in_index=3,
29
+ channels=512,
30
+ dropout_ratio=0.1,
31
+ reduction=2,
32
+ use_scale=True,
33
+ mode='embedded_gaussian',
34
+ num_classes=19,
35
+ norm_cfg=norm_cfg,
36
+ align_corners=False,
37
+ loss_decode=dict(
38
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
39
+ auxiliary_head=dict(
40
+ type='FCNHead',
41
+ in_channels=1024,
42
+ in_index=2,
43
+ channels=256,
44
+ num_convs=1,
45
+ concat_input=False,
46
+ dropout_ratio=0.1,
47
+ num_classes=19,
48
+ norm_cfg=norm_cfg,
49
+ align_corners=False,
50
+ loss_decode=dict(
51
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
52
+ # model training and testing settings
53
+ train_cfg=dict(),
54
+ test_cfg=dict(mode='whole'))
configs/_base_/models/dpt_vit-b16.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
2
+ data_preprocessor = dict(
3
+ type='SegDataPreProcessor',
4
+ mean=[123.675, 116.28, 103.53],
5
+ std=[58.395, 57.12, 57.375],
6
+ bgr_to_rgb=True,
7
+ pad_val=0,
8
+ seg_pad_val=255)
9
+ model = dict(
10
+ type='EncoderDecoder',
11
+ data_preprocessor=data_preprocessor,
12
+ pretrained='pretrain/vit-b16_p16_224-80ecf9dd.pth', # noqa
13
+ backbone=dict(
14
+ type='VisionTransformer',
15
+ img_size=224,
16
+ embed_dims=768,
17
+ num_layers=12,
18
+ num_heads=12,
19
+ out_indices=(2, 5, 8, 11),
20
+ final_norm=False,
21
+ with_cls_token=True,
22
+ output_cls_token=True),
23
+ decode_head=dict(
24
+ type='DPTHead',
25
+ in_channels=(768, 768, 768, 768),
26
+ channels=256,
27
+ embed_dims=768,
28
+ post_process_channels=[96, 192, 384, 768],
29
+ num_classes=150,
30
+ readout_type='project',
31
+ input_transform='multiple_select',
32
+ in_index=(0, 1, 2, 3),
33
+ norm_cfg=norm_cfg,
34
+ loss_decode=dict(
35
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
36
+ auxiliary_head=None,
37
+ # model training and testing settings
38
+ train_cfg=dict(),
39
+ test_cfg=dict(mode='whole')) # yapf: disable
configs/_base_/models/emanet_r50-d8.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ data_preprocessor = dict(
4
+ type='SegDataPreProcessor',
5
+ mean=[123.675, 116.28, 103.53],
6
+ std=[58.395, 57.12, 57.375],
7
+ bgr_to_rgb=True,
8
+ pad_val=0,
9
+ seg_pad_val=255)
10
+ model = dict(
11
+ type='EncoderDecoder',
12
+ data_preprocessor=data_preprocessor,
13
+ pretrained='open-mmlab://resnet50_v1c',
14
+ backbone=dict(
15
+ type='ResNetV1c',
16
+ depth=50,
17
+ num_stages=4,
18
+ out_indices=(0, 1, 2, 3),
19
+ dilations=(1, 1, 2, 4),
20
+ strides=(1, 2, 1, 1),
21
+ norm_cfg=norm_cfg,
22
+ norm_eval=False,
23
+ style='pytorch',
24
+ contract_dilation=True),
25
+ decode_head=dict(
26
+ type='EMAHead',
27
+ in_channels=2048,
28
+ in_index=3,
29
+ channels=256,
30
+ ema_channels=512,
31
+ num_bases=64,
32
+ num_stages=3,
33
+ momentum=0.1,
34
+ dropout_ratio=0.1,
35
+ num_classes=19,
36
+ norm_cfg=norm_cfg,
37
+ align_corners=False,
38
+ loss_decode=dict(
39
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
40
+ auxiliary_head=dict(
41
+ type='FCNHead',
42
+ in_channels=1024,
43
+ in_index=2,
44
+ channels=256,
45
+ num_convs=1,
46
+ concat_input=False,
47
+ dropout_ratio=0.1,
48
+ num_classes=19,
49
+ norm_cfg=norm_cfg,
50
+ align_corners=False,
51
+ loss_decode=dict(
52
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
53
+ # model training and testing settings
54
+ train_cfg=dict(),
55
+ test_cfg=dict(mode='whole'))
configs/_base_/models/encnet_r50-d8.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ data_preprocessor = dict(
4
+ type='SegDataPreProcessor',
5
+ mean=[123.675, 116.28, 103.53],
6
+ std=[58.395, 57.12, 57.375],
7
+ bgr_to_rgb=True,
8
+ pad_val=0,
9
+ seg_pad_val=255)
10
+ model = dict(
11
+ type='EncoderDecoder',
12
+ data_preprocessor=data_preprocessor,
13
+ pretrained='open-mmlab://resnet50_v1c',
14
+ backbone=dict(
15
+ type='ResNetV1c',
16
+ depth=50,
17
+ num_stages=4,
18
+ out_indices=(0, 1, 2, 3),
19
+ dilations=(1, 1, 2, 4),
20
+ strides=(1, 2, 1, 1),
21
+ norm_cfg=norm_cfg,
22
+ norm_eval=False,
23
+ style='pytorch',
24
+ contract_dilation=True),
25
+ decode_head=dict(
26
+ type='EncHead',
27
+ in_channels=[512, 1024, 2048],
28
+ in_index=(1, 2, 3),
29
+ channels=512,
30
+ num_codes=32,
31
+ use_se_loss=True,
32
+ add_lateral=False,
33
+ dropout_ratio=0.1,
34
+ num_classes=19,
35
+ norm_cfg=norm_cfg,
36
+ align_corners=False,
37
+ loss_decode=dict(
38
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
39
+ loss_se_decode=dict(
40
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)),
41
+ auxiliary_head=dict(
42
+ type='FCNHead',
43
+ in_channels=1024,
44
+ in_index=2,
45
+ channels=256,
46
+ num_convs=1,
47
+ concat_input=False,
48
+ dropout_ratio=0.1,
49
+ num_classes=19,
50
+ norm_cfg=norm_cfg,
51
+ align_corners=False,
52
+ loss_decode=dict(
53
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
54
+ # model training and testing settings
55
+ train_cfg=dict(),
56
+ test_cfg=dict(mode='whole'))
configs/_base_/models/erfnet_fcn.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ data_preprocessor = dict(
4
+ type='SegDataPreProcessor',
5
+ mean=[123.675, 116.28, 103.53],
6
+ std=[58.395, 57.12, 57.375],
7
+ bgr_to_rgb=True,
8
+ pad_val=0,
9
+ seg_pad_val=255)
10
+ model = dict(
11
+ type='EncoderDecoder',
12
+ data_preprocessor=data_preprocessor,
13
+ pretrained=None,
14
+ backbone=dict(
15
+ type='ERFNet',
16
+ in_channels=3,
17
+ enc_downsample_channels=(16, 64, 128),
18
+ enc_stage_non_bottlenecks=(5, 8),
19
+ enc_non_bottleneck_dilations=(2, 4, 8, 16),
20
+ enc_non_bottleneck_channels=(64, 128),
21
+ dec_upsample_channels=(64, 16),
22
+ dec_stages_non_bottleneck=(2, 2),
23
+ dec_non_bottleneck_channels=(64, 16),
24
+ dropout_ratio=0.1,
25
+ init_cfg=None),
26
+ decode_head=dict(
27
+ type='FCNHead',
28
+ in_channels=16,
29
+ channels=128,
30
+ num_convs=1,
31
+ concat_input=False,
32
+ dropout_ratio=0.1,
33
+ num_classes=19,
34
+ norm_cfg=norm_cfg,
35
+ align_corners=False,
36
+ loss_decode=dict(
37
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
38
+ # model training and testing settings
39
+ train_cfg=dict(),
40
+ test_cfg=dict(mode='whole'))
configs/_base_/models/fast_scnn.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01)
3
+ data_preprocessor = dict(
4
+ type='SegDataPreProcessor',
5
+ mean=[123.675, 116.28, 103.53],
6
+ std=[58.395, 57.12, 57.375],
7
+ bgr_to_rgb=True,
8
+ pad_val=0,
9
+ seg_pad_val=255)
10
+ model = dict(
11
+ type='EncoderDecoder',
12
+ data_preprocessor=data_preprocessor,
13
+ backbone=dict(
14
+ type='FastSCNN',
15
+ downsample_dw_channels=(32, 48),
16
+ global_in_channels=64,
17
+ global_block_channels=(64, 96, 128),
18
+ global_block_strides=(2, 2, 1),
19
+ global_out_channels=128,
20
+ higher_in_channels=64,
21
+ lower_in_channels=128,
22
+ fusion_out_channels=128,
23
+ out_indices=(0, 1, 2),
24
+ norm_cfg=norm_cfg,
25
+ align_corners=False),
26
+ decode_head=dict(
27
+ type='DepthwiseSeparableFCNHead',
28
+ in_channels=128,
29
+ channels=128,
30
+ concat_input=False,
31
+ num_classes=19,
32
+ in_index=-1,
33
+ norm_cfg=norm_cfg,
34
+ align_corners=False,
35
+ loss_decode=dict(
36
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1)),
37
+ auxiliary_head=[
38
+ dict(
39
+ type='FCNHead',
40
+ in_channels=128,
41
+ channels=32,
42
+ num_convs=1,
43
+ num_classes=19,
44
+ in_index=-2,
45
+ norm_cfg=norm_cfg,
46
+ concat_input=False,
47
+ align_corners=False,
48
+ loss_decode=dict(
49
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
50
+ dict(
51
+ type='FCNHead',
52
+ in_channels=64,
53
+ channels=32,
54
+ num_convs=1,
55
+ num_classes=19,
56
+ in_index=-3,
57
+ norm_cfg=norm_cfg,
58
+ concat_input=False,
59
+ align_corners=False,
60
+ loss_decode=dict(
61
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
62
+ ],
63
+ # model training and testing settings
64
+ train_cfg=dict(),
65
+ test_cfg=dict(mode='whole'))