Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	auto anotators
Browse filesThis view is limited to 50 files because it contains too many changes. Β 
							See raw diff
- annotator/canny/__init__.py +6 -0
- annotator/ckpts/body_pose_model.pth +3 -0
- annotator/ckpts/ckpts.txt +1 -0
- annotator/ckpts/hand_pose_model.pth +3 -0
- annotator/openpose/__init__.py +44 -0
- annotator/openpose/body.py +219 -0
- annotator/openpose/hand.py +86 -0
- annotator/openpose/model.py +219 -0
- annotator/openpose/util.py +164 -0
- annotator/uniformer/__init__.py +23 -0
- annotator/uniformer/configs/_base_/datasets/ade20k.py +54 -0
- annotator/uniformer/configs/_base_/datasets/chase_db1.py +59 -0
- annotator/uniformer/configs/_base_/datasets/cityscapes.py +54 -0
- annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py +35 -0
- annotator/uniformer/configs/_base_/datasets/drive.py +59 -0
- annotator/uniformer/configs/_base_/datasets/hrf.py +59 -0
- annotator/uniformer/configs/_base_/datasets/pascal_context.py +60 -0
- annotator/uniformer/configs/_base_/datasets/pascal_context_59.py +60 -0
- annotator/uniformer/configs/_base_/datasets/pascal_voc12.py +57 -0
- annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py +9 -0
- annotator/uniformer/configs/_base_/datasets/stare.py +59 -0
- annotator/uniformer/configs/_base_/default_runtime.py +14 -0
- annotator/uniformer/configs/_base_/models/ann_r50-d8.py +46 -0
- annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py +44 -0
- annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py +44 -0
- annotator/uniformer/configs/_base_/models/cgnet.py +35 -0
- annotator/uniformer/configs/_base_/models/danet_r50-d8.py +44 -0
- annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py +44 -0
- annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py +50 -0
- annotator/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py +46 -0
- annotator/uniformer/configs/_base_/models/dmnet_r50-d8.py +44 -0
- annotator/uniformer/configs/_base_/models/dnl_r50-d8.py +46 -0
- annotator/uniformer/configs/_base_/models/emanet_r50-d8.py +47 -0
- annotator/uniformer/configs/_base_/models/encnet_r50-d8.py +48 -0
- annotator/uniformer/configs/_base_/models/fast_scnn.py +57 -0
- annotator/uniformer/configs/_base_/models/fcn_hr18.py +52 -0
- annotator/uniformer/configs/_base_/models/fcn_r50-d8.py +45 -0
- annotator/uniformer/configs/_base_/models/fcn_unet_s5-d16.py +51 -0
- annotator/uniformer/configs/_base_/models/fpn_r50.py +36 -0
- annotator/uniformer/configs/_base_/models/fpn_uniformer.py +35 -0
- annotator/uniformer/configs/_base_/models/gcnet_r50-d8.py +46 -0
- annotator/uniformer/configs/_base_/models/lraspp_m-v3-d8.py +25 -0
- annotator/uniformer/configs/_base_/models/nonlocal_r50-d8.py +46 -0
- annotator/uniformer/configs/_base_/models/ocrnet_hr18.py +68 -0
- annotator/uniformer/configs/_base_/models/ocrnet_r50-d8.py +47 -0
- annotator/uniformer/configs/_base_/models/pointrend_r50.py +56 -0
- annotator/uniformer/configs/_base_/models/psanet_r50-d8.py +49 -0
- annotator/uniformer/configs/_base_/models/pspnet_r50-d8.py +44 -0
- annotator/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py +50 -0
- annotator/uniformer/configs/_base_/models/upernet_r50.py +44 -0
    	
        annotator/canny/__init__.py
    ADDED
    
    | @@ -0,0 +1,6 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import cv2
         | 
| 2 | 
            +
             | 
| 3 | 
            +
             | 
| 4 | 
            +
            class CannyDetector:
         | 
| 5 | 
            +
                def __call__(self, img, low_threshold, high_threshold):
         | 
| 6 | 
            +
                    return cv2.Canny(img, low_threshold, high_threshold)
         | 
    	
        annotator/ckpts/body_pose_model.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:25a948c16078b0f08e236bda51a385d855ef4c153598947c28c0d47ed94bb746
         | 
| 3 | 
            +
            size 209267595
         | 
    	
        annotator/ckpts/ckpts.txt
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            Weights here.
         | 
    	
        annotator/ckpts/hand_pose_model.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:b76b00d1750901abd07b9f9d8c98cc3385b8fe834a26d4b4f0aad439e75fc600
         | 
| 3 | 
            +
            size 147341049
         | 
    	
        annotator/openpose/__init__.py
    ADDED
    
    | @@ -0,0 +1,44 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os
         | 
| 2 | 
            +
            os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            import torch
         | 
| 5 | 
            +
            import numpy as np
         | 
| 6 | 
            +
            from . import util
         | 
| 7 | 
            +
            from .body import Body
         | 
| 8 | 
            +
            from .hand import Hand
         | 
| 9 | 
            +
            from annotator.util import annotator_ckpts_path
         | 
| 10 | 
            +
             | 
| 11 | 
            +
             | 
| 12 | 
            +
            body_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/body_pose_model.pth"
         | 
| 13 | 
            +
            hand_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/hand_pose_model.pth"
         | 
| 14 | 
            +
             | 
| 15 | 
            +
             | 
| 16 | 
            +
            class OpenposeDetector:
         | 
| 17 | 
            +
                def __init__(self):
         | 
| 18 | 
            +
                    body_modelpath = os.path.join(annotator_ckpts_path, "body_pose_model.pth")
         | 
| 19 | 
            +
                    hand_modelpath = os.path.join(annotator_ckpts_path, "hand_pose_model.pth")
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                    if not os.path.exists(hand_modelpath):
         | 
| 22 | 
            +
                        from basicsr.utils.download_util import load_file_from_url
         | 
| 23 | 
            +
                        load_file_from_url(body_model_path, model_dir=annotator_ckpts_path)
         | 
| 24 | 
            +
                        load_file_from_url(hand_model_path, model_dir=annotator_ckpts_path)
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                    self.body_estimation = Body(body_modelpath)
         | 
| 27 | 
            +
                    self.hand_estimation = Hand(hand_modelpath)
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                def __call__(self, oriImg, hand=False):
         | 
| 30 | 
            +
                    oriImg = oriImg[:, :, ::-1].copy()
         | 
| 31 | 
            +
                    with torch.no_grad():
         | 
| 32 | 
            +
                        candidate, subset = self.body_estimation(oriImg)
         | 
| 33 | 
            +
                        canvas = np.zeros_like(oriImg)
         | 
| 34 | 
            +
                        canvas = util.draw_bodypose(canvas, candidate, subset)
         | 
| 35 | 
            +
                        if hand:
         | 
| 36 | 
            +
                            hands_list = util.handDetect(candidate, subset, oriImg)
         | 
| 37 | 
            +
                            all_hand_peaks = []
         | 
| 38 | 
            +
                            for x, y, w, is_left in hands_list:
         | 
| 39 | 
            +
                                peaks = self.hand_estimation(oriImg[y:y+w, x:x+w, :])
         | 
| 40 | 
            +
                                peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x)
         | 
| 41 | 
            +
                                peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y)
         | 
| 42 | 
            +
                                all_hand_peaks.append(peaks)
         | 
| 43 | 
            +
                            canvas = util.draw_handpose(canvas, all_hand_peaks)
         | 
| 44 | 
            +
                        return canvas, dict(candidate=candidate.tolist(), subset=subset.tolist())
         | 
    	
        annotator/openpose/body.py
    ADDED
    
    | @@ -0,0 +1,219 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import cv2
         | 
| 2 | 
            +
            import numpy as np
         | 
| 3 | 
            +
            import math
         | 
| 4 | 
            +
            import time
         | 
| 5 | 
            +
            from scipy.ndimage.filters import gaussian_filter
         | 
| 6 | 
            +
            import matplotlib.pyplot as plt
         | 
| 7 | 
            +
            import matplotlib
         | 
| 8 | 
            +
            import torch
         | 
| 9 | 
            +
            from torchvision import transforms
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            from . import util
         | 
| 12 | 
            +
            from .model import bodypose_model
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            class Body(object):
         | 
| 15 | 
            +
                def __init__(self, model_path):
         | 
| 16 | 
            +
                    self.model = bodypose_model()
         | 
| 17 | 
            +
                    if torch.cuda.is_available():
         | 
| 18 | 
            +
                        self.model = self.model.cuda()
         | 
| 19 | 
            +
                        print('cuda')
         | 
| 20 | 
            +
                    model_dict = util.transfer(self.model, torch.load(model_path))
         | 
| 21 | 
            +
                    self.model.load_state_dict(model_dict)
         | 
| 22 | 
            +
                    self.model.eval()
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                def __call__(self, oriImg):
         | 
| 25 | 
            +
                    # scale_search = [0.5, 1.0, 1.5, 2.0]
         | 
| 26 | 
            +
                    scale_search = [0.5]
         | 
| 27 | 
            +
                    boxsize = 368
         | 
| 28 | 
            +
                    stride = 8
         | 
| 29 | 
            +
                    padValue = 128
         | 
| 30 | 
            +
                    thre1 = 0.1
         | 
| 31 | 
            +
                    thre2 = 0.05
         | 
| 32 | 
            +
                    multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
         | 
| 33 | 
            +
                    heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19))
         | 
| 34 | 
            +
                    paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                    for m in range(len(multiplier)):
         | 
| 37 | 
            +
                        scale = multiplier[m]
         | 
| 38 | 
            +
                        imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
         | 
| 39 | 
            +
                        imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
         | 
| 40 | 
            +
                        im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
         | 
| 41 | 
            +
                        im = np.ascontiguousarray(im)
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                        data = torch.from_numpy(im).float()
         | 
| 44 | 
            +
                        if torch.cuda.is_available():
         | 
| 45 | 
            +
                            data = data.cuda()
         | 
| 46 | 
            +
                        # data = data.permute([2, 0, 1]).unsqueeze(0).float()
         | 
| 47 | 
            +
                        with torch.no_grad():
         | 
| 48 | 
            +
                            Mconv7_stage6_L1, Mconv7_stage6_L2 = self.model(data)
         | 
| 49 | 
            +
                        Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy()
         | 
| 50 | 
            +
                        Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy()
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                        # extract outputs, resize, and remove padding
         | 
| 53 | 
            +
                        # heatmap = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[1]].data), (1, 2, 0))  # output 1 is heatmaps
         | 
| 54 | 
            +
                        heatmap = np.transpose(np.squeeze(Mconv7_stage6_L2), (1, 2, 0))  # output 1 is heatmaps
         | 
| 55 | 
            +
                        heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
         | 
| 56 | 
            +
                        heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
         | 
| 57 | 
            +
                        heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                        # paf = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[0]].data), (1, 2, 0))  # output 0 is PAFs
         | 
| 60 | 
            +
                        paf = np.transpose(np.squeeze(Mconv7_stage6_L1), (1, 2, 0))  # output 0 is PAFs
         | 
| 61 | 
            +
                        paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
         | 
| 62 | 
            +
                        paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
         | 
| 63 | 
            +
                        paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                        heatmap_avg += heatmap_avg + heatmap / len(multiplier)
         | 
| 66 | 
            +
                        paf_avg += + paf / len(multiplier)
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                    all_peaks = []
         | 
| 69 | 
            +
                    peak_counter = 0
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                    for part in range(18):
         | 
| 72 | 
            +
                        map_ori = heatmap_avg[:, :, part]
         | 
| 73 | 
            +
                        one_heatmap = gaussian_filter(map_ori, sigma=3)
         | 
| 74 | 
            +
             | 
| 75 | 
            +
                        map_left = np.zeros(one_heatmap.shape)
         | 
| 76 | 
            +
                        map_left[1:, :] = one_heatmap[:-1, :]
         | 
| 77 | 
            +
                        map_right = np.zeros(one_heatmap.shape)
         | 
| 78 | 
            +
                        map_right[:-1, :] = one_heatmap[1:, :]
         | 
| 79 | 
            +
                        map_up = np.zeros(one_heatmap.shape)
         | 
| 80 | 
            +
                        map_up[:, 1:] = one_heatmap[:, :-1]
         | 
| 81 | 
            +
                        map_down = np.zeros(one_heatmap.shape)
         | 
| 82 | 
            +
                        map_down[:, :-1] = one_heatmap[:, 1:]
         | 
| 83 | 
            +
             | 
| 84 | 
            +
                        peaks_binary = np.logical_and.reduce(
         | 
| 85 | 
            +
                            (one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1))
         | 
| 86 | 
            +
                        peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]))  # note reverse
         | 
| 87 | 
            +
                        peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
         | 
| 88 | 
            +
                        peak_id = range(peak_counter, peak_counter + len(peaks))
         | 
| 89 | 
            +
                        peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i],) for i in range(len(peak_id))]
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                        all_peaks.append(peaks_with_score_and_id)
         | 
| 92 | 
            +
                        peak_counter += len(peaks)
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                    # find connection in the specified sequence, center 29 is in the position 15
         | 
| 95 | 
            +
                    limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
         | 
| 96 | 
            +
                               [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
         | 
| 97 | 
            +
                               [1, 16], [16, 18], [3, 17], [6, 18]]
         | 
| 98 | 
            +
                    # the middle joints heatmap correpondence
         | 
| 99 | 
            +
                    mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \
         | 
| 100 | 
            +
                              [23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \
         | 
| 101 | 
            +
                              [55, 56], [37, 38], [45, 46]]
         | 
| 102 | 
            +
             | 
| 103 | 
            +
                    connection_all = []
         | 
| 104 | 
            +
                    special_k = []
         | 
| 105 | 
            +
                    mid_num = 10
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                    for k in range(len(mapIdx)):
         | 
| 108 | 
            +
                        score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]]
         | 
| 109 | 
            +
                        candA = all_peaks[limbSeq[k][0] - 1]
         | 
| 110 | 
            +
                        candB = all_peaks[limbSeq[k][1] - 1]
         | 
| 111 | 
            +
                        nA = len(candA)
         | 
| 112 | 
            +
                        nB = len(candB)
         | 
| 113 | 
            +
                        indexA, indexB = limbSeq[k]
         | 
| 114 | 
            +
                        if (nA != 0 and nB != 0):
         | 
| 115 | 
            +
                            connection_candidate = []
         | 
| 116 | 
            +
                            for i in range(nA):
         | 
| 117 | 
            +
                                for j in range(nB):
         | 
| 118 | 
            +
                                    vec = np.subtract(candB[j][:2], candA[i][:2])
         | 
| 119 | 
            +
                                    norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
         | 
| 120 | 
            +
                                    norm = max(0.001, norm)
         | 
| 121 | 
            +
                                    vec = np.divide(vec, norm)
         | 
| 122 | 
            +
             | 
| 123 | 
            +
                                    startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
         | 
| 124 | 
            +
                                                        np.linspace(candA[i][1], candB[j][1], num=mid_num)))
         | 
| 125 | 
            +
             | 
| 126 | 
            +
                                    vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
         | 
| 127 | 
            +
                                                      for I in range(len(startend))])
         | 
| 128 | 
            +
                                    vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
         | 
| 129 | 
            +
                                                      for I in range(len(startend))])
         | 
| 130 | 
            +
             | 
| 131 | 
            +
                                    score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
         | 
| 132 | 
            +
                                    score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(
         | 
| 133 | 
            +
                                        0.5 * oriImg.shape[0] / norm - 1, 0)
         | 
| 134 | 
            +
                                    criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts)
         | 
| 135 | 
            +
                                    criterion2 = score_with_dist_prior > 0
         | 
| 136 | 
            +
                                    if criterion1 and criterion2:
         | 
| 137 | 
            +
                                        connection_candidate.append(
         | 
| 138 | 
            +
                                            [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]])
         | 
| 139 | 
            +
             | 
| 140 | 
            +
                            connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
         | 
| 141 | 
            +
                            connection = np.zeros((0, 5))
         | 
| 142 | 
            +
                            for c in range(len(connection_candidate)):
         | 
| 143 | 
            +
                                i, j, s = connection_candidate[c][0:3]
         | 
| 144 | 
            +
                                if (i not in connection[:, 3] and j not in connection[:, 4]):
         | 
| 145 | 
            +
                                    connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
         | 
| 146 | 
            +
                                    if (len(connection) >= min(nA, nB)):
         | 
| 147 | 
            +
                                        break
         | 
| 148 | 
            +
             | 
| 149 | 
            +
                            connection_all.append(connection)
         | 
| 150 | 
            +
                        else:
         | 
| 151 | 
            +
                            special_k.append(k)
         | 
| 152 | 
            +
                            connection_all.append([])
         | 
| 153 | 
            +
             | 
| 154 | 
            +
                    # last number in each row is the total parts number of that person
         | 
| 155 | 
            +
                    # the second last number in each row is the score of the overall configuration
         | 
| 156 | 
            +
                    subset = -1 * np.ones((0, 20))
         | 
| 157 | 
            +
                    candidate = np.array([item for sublist in all_peaks for item in sublist])
         | 
| 158 | 
            +
             | 
| 159 | 
            +
                    for k in range(len(mapIdx)):
         | 
| 160 | 
            +
                        if k not in special_k:
         | 
| 161 | 
            +
                            partAs = connection_all[k][:, 0]
         | 
| 162 | 
            +
                            partBs = connection_all[k][:, 1]
         | 
| 163 | 
            +
                            indexA, indexB = np.array(limbSeq[k]) - 1
         | 
| 164 | 
            +
             | 
| 165 | 
            +
                            for i in range(len(connection_all[k])):  # = 1:size(temp,1)
         | 
| 166 | 
            +
                                found = 0
         | 
| 167 | 
            +
                                subset_idx = [-1, -1]
         | 
| 168 | 
            +
                                for j in range(len(subset)):  # 1:size(subset,1):
         | 
| 169 | 
            +
                                    if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
         | 
| 170 | 
            +
                                        subset_idx[found] = j
         | 
| 171 | 
            +
                                        found += 1
         | 
| 172 | 
            +
             | 
| 173 | 
            +
                                if found == 1:
         | 
| 174 | 
            +
                                    j = subset_idx[0]
         | 
| 175 | 
            +
                                    if subset[j][indexB] != partBs[i]:
         | 
| 176 | 
            +
                                        subset[j][indexB] = partBs[i]
         | 
| 177 | 
            +
                                        subset[j][-1] += 1
         | 
| 178 | 
            +
                                        subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
         | 
| 179 | 
            +
                                elif found == 2:  # if found 2 and disjoint, merge them
         | 
| 180 | 
            +
                                    j1, j2 = subset_idx
         | 
| 181 | 
            +
                                    membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
         | 
| 182 | 
            +
                                    if len(np.nonzero(membership == 2)[0]) == 0:  # merge
         | 
| 183 | 
            +
                                        subset[j1][:-2] += (subset[j2][:-2] + 1)
         | 
| 184 | 
            +
                                        subset[j1][-2:] += subset[j2][-2:]
         | 
| 185 | 
            +
                                        subset[j1][-2] += connection_all[k][i][2]
         | 
| 186 | 
            +
                                        subset = np.delete(subset, j2, 0)
         | 
| 187 | 
            +
                                    else:  # as like found == 1
         | 
| 188 | 
            +
                                        subset[j1][indexB] = partBs[i]
         | 
| 189 | 
            +
                                        subset[j1][-1] += 1
         | 
| 190 | 
            +
                                        subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
         | 
| 191 | 
            +
             | 
| 192 | 
            +
                                # if find no partA in the subset, create a new subset
         | 
| 193 | 
            +
                                elif not found and k < 17:
         | 
| 194 | 
            +
                                    row = -1 * np.ones(20)
         | 
| 195 | 
            +
                                    row[indexA] = partAs[i]
         | 
| 196 | 
            +
                                    row[indexB] = partBs[i]
         | 
| 197 | 
            +
                                    row[-1] = 2
         | 
| 198 | 
            +
                                    row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2]
         | 
| 199 | 
            +
                                    subset = np.vstack([subset, row])
         | 
| 200 | 
            +
                    # delete some rows of subset which has few parts occur
         | 
| 201 | 
            +
                    deleteIdx = []
         | 
| 202 | 
            +
                    for i in range(len(subset)):
         | 
| 203 | 
            +
                        if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
         | 
| 204 | 
            +
                            deleteIdx.append(i)
         | 
| 205 | 
            +
                    subset = np.delete(subset, deleteIdx, axis=0)
         | 
| 206 | 
            +
             | 
| 207 | 
            +
                    # subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts
         | 
| 208 | 
            +
                    # candidate: x, y, score, id
         | 
| 209 | 
            +
                    return candidate, subset
         | 
| 210 | 
            +
             | 
| 211 | 
            +
            if __name__ == "__main__":
         | 
| 212 | 
            +
                body_estimation = Body('../model/body_pose_model.pth')
         | 
| 213 | 
            +
             | 
| 214 | 
            +
                test_image = '../images/ski.jpg'
         | 
| 215 | 
            +
                oriImg = cv2.imread(test_image)  # B,G,R order
         | 
| 216 | 
            +
                candidate, subset = body_estimation(oriImg)
         | 
| 217 | 
            +
                canvas = util.draw_bodypose(oriImg, candidate, subset)
         | 
| 218 | 
            +
                plt.imshow(canvas[:, :, [2, 1, 0]])
         | 
| 219 | 
            +
                plt.show()
         | 
    	
        annotator/openpose/hand.py
    ADDED
    
    | @@ -0,0 +1,86 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import cv2
         | 
| 2 | 
            +
            import json
         | 
| 3 | 
            +
            import numpy as np
         | 
| 4 | 
            +
            import math
         | 
| 5 | 
            +
            import time
         | 
| 6 | 
            +
            from scipy.ndimage.filters import gaussian_filter
         | 
| 7 | 
            +
            import matplotlib.pyplot as plt
         | 
| 8 | 
            +
            import matplotlib
         | 
| 9 | 
            +
            import torch
         | 
| 10 | 
            +
            from skimage.measure import label
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            from .model import handpose_model
         | 
| 13 | 
            +
            from . import util
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            class Hand(object):
         | 
| 16 | 
            +
                def __init__(self, model_path):
         | 
| 17 | 
            +
                    self.model = handpose_model()
         | 
| 18 | 
            +
                    if torch.cuda.is_available():
         | 
| 19 | 
            +
                        self.model = self.model.cuda()
         | 
| 20 | 
            +
                        print('cuda')
         | 
| 21 | 
            +
                    model_dict = util.transfer(self.model, torch.load(model_path))
         | 
| 22 | 
            +
                    self.model.load_state_dict(model_dict)
         | 
| 23 | 
            +
                    self.model.eval()
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                def __call__(self, oriImg):
         | 
| 26 | 
            +
                    scale_search = [0.5, 1.0, 1.5, 2.0]
         | 
| 27 | 
            +
                    # scale_search = [0.5]
         | 
| 28 | 
            +
                    boxsize = 368
         | 
| 29 | 
            +
                    stride = 8
         | 
| 30 | 
            +
                    padValue = 128
         | 
| 31 | 
            +
                    thre = 0.05
         | 
| 32 | 
            +
                    multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
         | 
| 33 | 
            +
                    heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 22))
         | 
| 34 | 
            +
                    # paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                    for m in range(len(multiplier)):
         | 
| 37 | 
            +
                        scale = multiplier[m]
         | 
| 38 | 
            +
                        imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
         | 
| 39 | 
            +
                        imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
         | 
| 40 | 
            +
                        im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
         | 
| 41 | 
            +
                        im = np.ascontiguousarray(im)
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                        data = torch.from_numpy(im).float()
         | 
| 44 | 
            +
                        if torch.cuda.is_available():
         | 
| 45 | 
            +
                            data = data.cuda()
         | 
| 46 | 
            +
                        # data = data.permute([2, 0, 1]).unsqueeze(0).float()
         | 
| 47 | 
            +
                        with torch.no_grad():
         | 
| 48 | 
            +
                            output = self.model(data).cpu().numpy()
         | 
| 49 | 
            +
                            # output = self.model(data).numpy()q
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                        # extract outputs, resize, and remove padding
         | 
| 52 | 
            +
                        heatmap = np.transpose(np.squeeze(output), (1, 2, 0))  # output 1 is heatmaps
         | 
| 53 | 
            +
                        heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
         | 
| 54 | 
            +
                        heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
         | 
| 55 | 
            +
                        heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                        heatmap_avg += heatmap / len(multiplier)
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                    all_peaks = []
         | 
| 60 | 
            +
                    for part in range(21):
         | 
| 61 | 
            +
                        map_ori = heatmap_avg[:, :, part]
         | 
| 62 | 
            +
                        one_heatmap = gaussian_filter(map_ori, sigma=3)
         | 
| 63 | 
            +
                        binary = np.ascontiguousarray(one_heatmap > thre, dtype=np.uint8)
         | 
| 64 | 
            +
                        # ε
¨ι¨ε°δΊιεΌ
         | 
| 65 | 
            +
                        if np.sum(binary) == 0:
         | 
| 66 | 
            +
                            all_peaks.append([0, 0])
         | 
| 67 | 
            +
                            continue
         | 
| 68 | 
            +
                        label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim)
         | 
| 69 | 
            +
                        max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1
         | 
| 70 | 
            +
                        label_img[label_img != max_index] = 0
         | 
| 71 | 
            +
                        map_ori[label_img == 0] = 0
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                        y, x = util.npmax(map_ori)
         | 
| 74 | 
            +
                        all_peaks.append([x, y])
         | 
| 75 | 
            +
                    return np.array(all_peaks)
         | 
| 76 | 
            +
             | 
| 77 | 
            +
            if __name__ == "__main__":
         | 
| 78 | 
            +
                hand_estimation = Hand('../model/hand_pose_model.pth')
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                # test_image = '../images/hand.jpg'
         | 
| 81 | 
            +
                test_image = '../images/hand.jpg'
         | 
| 82 | 
            +
                oriImg = cv2.imread(test_image)  # B,G,R order
         | 
| 83 | 
            +
                peaks = hand_estimation(oriImg)
         | 
| 84 | 
            +
                canvas = util.draw_handpose(oriImg, peaks, True)
         | 
| 85 | 
            +
                cv2.imshow('', canvas)
         | 
| 86 | 
            +
                cv2.waitKey(0)
         | 
    	
        annotator/openpose/model.py
    ADDED
    
    | @@ -0,0 +1,219 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import torch
         | 
| 2 | 
            +
            from collections import OrderedDict
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            import torch
         | 
| 5 | 
            +
            import torch.nn as nn
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            def make_layers(block, no_relu_layers):
         | 
| 8 | 
            +
                layers = []
         | 
| 9 | 
            +
                for layer_name, v in block.items():
         | 
| 10 | 
            +
                    if 'pool' in layer_name:
         | 
| 11 | 
            +
                        layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1],
         | 
| 12 | 
            +
                                                padding=v[2])
         | 
| 13 | 
            +
                        layers.append((layer_name, layer))
         | 
| 14 | 
            +
                    else:
         | 
| 15 | 
            +
                        conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
         | 
| 16 | 
            +
                                           kernel_size=v[2], stride=v[3],
         | 
| 17 | 
            +
                                           padding=v[4])
         | 
| 18 | 
            +
                        layers.append((layer_name, conv2d))
         | 
| 19 | 
            +
                        if layer_name not in no_relu_layers:
         | 
| 20 | 
            +
                            layers.append(('relu_'+layer_name, nn.ReLU(inplace=True)))
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                return nn.Sequential(OrderedDict(layers))
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            class bodypose_model(nn.Module):
         | 
| 25 | 
            +
                def __init__(self):
         | 
| 26 | 
            +
                    super(bodypose_model, self).__init__()
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                    # these layers have no relu layer
         | 
| 29 | 
            +
                    no_relu_layers = ['conv5_5_CPM_L1', 'conv5_5_CPM_L2', 'Mconv7_stage2_L1',\
         | 
| 30 | 
            +
                                      'Mconv7_stage2_L2', 'Mconv7_stage3_L1', 'Mconv7_stage3_L2',\
         | 
| 31 | 
            +
                                      'Mconv7_stage4_L1', 'Mconv7_stage4_L2', 'Mconv7_stage5_L1',\
         | 
| 32 | 
            +
                                      'Mconv7_stage5_L2', 'Mconv7_stage6_L1', 'Mconv7_stage6_L1']
         | 
| 33 | 
            +
                    blocks = {}
         | 
| 34 | 
            +
                    block0 = OrderedDict([
         | 
| 35 | 
            +
                                  ('conv1_1', [3, 64, 3, 1, 1]),
         | 
| 36 | 
            +
                                  ('conv1_2', [64, 64, 3, 1, 1]),
         | 
| 37 | 
            +
                                  ('pool1_stage1', [2, 2, 0]),
         | 
| 38 | 
            +
                                  ('conv2_1', [64, 128, 3, 1, 1]),
         | 
| 39 | 
            +
                                  ('conv2_2', [128, 128, 3, 1, 1]),
         | 
| 40 | 
            +
                                  ('pool2_stage1', [2, 2, 0]),
         | 
| 41 | 
            +
                                  ('conv3_1', [128, 256, 3, 1, 1]),
         | 
| 42 | 
            +
                                  ('conv3_2', [256, 256, 3, 1, 1]),
         | 
| 43 | 
            +
                                  ('conv3_3', [256, 256, 3, 1, 1]),
         | 
| 44 | 
            +
                                  ('conv3_4', [256, 256, 3, 1, 1]),
         | 
| 45 | 
            +
                                  ('pool3_stage1', [2, 2, 0]),
         | 
| 46 | 
            +
                                  ('conv4_1', [256, 512, 3, 1, 1]),
         | 
| 47 | 
            +
                                  ('conv4_2', [512, 512, 3, 1, 1]),
         | 
| 48 | 
            +
                                  ('conv4_3_CPM', [512, 256, 3, 1, 1]),
         | 
| 49 | 
            +
                                  ('conv4_4_CPM', [256, 128, 3, 1, 1])
         | 
| 50 | 
            +
                              ])
         | 
| 51 | 
            +
             | 
| 52 | 
            +
             | 
| 53 | 
            +
                    # Stage 1
         | 
| 54 | 
            +
                    block1_1 = OrderedDict([
         | 
| 55 | 
            +
                                    ('conv5_1_CPM_L1', [128, 128, 3, 1, 1]),
         | 
| 56 | 
            +
                                    ('conv5_2_CPM_L1', [128, 128, 3, 1, 1]),
         | 
| 57 | 
            +
                                    ('conv5_3_CPM_L1', [128, 128, 3, 1, 1]),
         | 
| 58 | 
            +
                                    ('conv5_4_CPM_L1', [128, 512, 1, 1, 0]),
         | 
| 59 | 
            +
                                    ('conv5_5_CPM_L1', [512, 38, 1, 1, 0])
         | 
| 60 | 
            +
                                ])
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                    block1_2 = OrderedDict([
         | 
| 63 | 
            +
                                    ('conv5_1_CPM_L2', [128, 128, 3, 1, 1]),
         | 
| 64 | 
            +
                                    ('conv5_2_CPM_L2', [128, 128, 3, 1, 1]),
         | 
| 65 | 
            +
                                    ('conv5_3_CPM_L2', [128, 128, 3, 1, 1]),
         | 
| 66 | 
            +
                                    ('conv5_4_CPM_L2', [128, 512, 1, 1, 0]),
         | 
| 67 | 
            +
                                    ('conv5_5_CPM_L2', [512, 19, 1, 1, 0])
         | 
| 68 | 
            +
                                ])
         | 
| 69 | 
            +
                    blocks['block1_1'] = block1_1
         | 
| 70 | 
            +
                    blocks['block1_2'] = block1_2
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                    self.model0 = make_layers(block0, no_relu_layers)
         | 
| 73 | 
            +
             | 
| 74 | 
            +
                    # Stages 2 - 6
         | 
| 75 | 
            +
                    for i in range(2, 7):
         | 
| 76 | 
            +
                        blocks['block%d_1' % i] = OrderedDict([
         | 
| 77 | 
            +
                                ('Mconv1_stage%d_L1' % i, [185, 128, 7, 1, 3]),
         | 
| 78 | 
            +
                                ('Mconv2_stage%d_L1' % i, [128, 128, 7, 1, 3]),
         | 
| 79 | 
            +
                                ('Mconv3_stage%d_L1' % i, [128, 128, 7, 1, 3]),
         | 
| 80 | 
            +
                                ('Mconv4_stage%d_L1' % i, [128, 128, 7, 1, 3]),
         | 
| 81 | 
            +
                                ('Mconv5_stage%d_L1' % i, [128, 128, 7, 1, 3]),
         | 
| 82 | 
            +
                                ('Mconv6_stage%d_L1' % i, [128, 128, 1, 1, 0]),
         | 
| 83 | 
            +
                                ('Mconv7_stage%d_L1' % i, [128, 38, 1, 1, 0])
         | 
| 84 | 
            +
                            ])
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                        blocks['block%d_2' % i] = OrderedDict([
         | 
| 87 | 
            +
                                ('Mconv1_stage%d_L2' % i, [185, 128, 7, 1, 3]),
         | 
| 88 | 
            +
                                ('Mconv2_stage%d_L2' % i, [128, 128, 7, 1, 3]),
         | 
| 89 | 
            +
                                ('Mconv3_stage%d_L2' % i, [128, 128, 7, 1, 3]),
         | 
| 90 | 
            +
                                ('Mconv4_stage%d_L2' % i, [128, 128, 7, 1, 3]),
         | 
| 91 | 
            +
                                ('Mconv5_stage%d_L2' % i, [128, 128, 7, 1, 3]),
         | 
| 92 | 
            +
                                ('Mconv6_stage%d_L2' % i, [128, 128, 1, 1, 0]),
         | 
| 93 | 
            +
                                ('Mconv7_stage%d_L2' % i, [128, 19, 1, 1, 0])
         | 
| 94 | 
            +
                            ])
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                    for k in blocks.keys():
         | 
| 97 | 
            +
                        blocks[k] = make_layers(blocks[k], no_relu_layers)
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                    self.model1_1 = blocks['block1_1']
         | 
| 100 | 
            +
                    self.model2_1 = blocks['block2_1']
         | 
| 101 | 
            +
                    self.model3_1 = blocks['block3_1']
         | 
| 102 | 
            +
                    self.model4_1 = blocks['block4_1']
         | 
| 103 | 
            +
                    self.model5_1 = blocks['block5_1']
         | 
| 104 | 
            +
                    self.model6_1 = blocks['block6_1']
         | 
| 105 | 
            +
             | 
| 106 | 
            +
                    self.model1_2 = blocks['block1_2']
         | 
| 107 | 
            +
                    self.model2_2 = blocks['block2_2']
         | 
| 108 | 
            +
                    self.model3_2 = blocks['block3_2']
         | 
| 109 | 
            +
                    self.model4_2 = blocks['block4_2']
         | 
| 110 | 
            +
                    self.model5_2 = blocks['block5_2']
         | 
| 111 | 
            +
                    self.model6_2 = blocks['block6_2']
         | 
| 112 | 
            +
             | 
| 113 | 
            +
             | 
| 114 | 
            +
                def forward(self, x):
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                    out1 = self.model0(x)
         | 
| 117 | 
            +
             | 
| 118 | 
            +
                    out1_1 = self.model1_1(out1)
         | 
| 119 | 
            +
                    out1_2 = self.model1_2(out1)
         | 
| 120 | 
            +
                    out2 = torch.cat([out1_1, out1_2, out1], 1)
         | 
| 121 | 
            +
             | 
| 122 | 
            +
                    out2_1 = self.model2_1(out2)
         | 
| 123 | 
            +
                    out2_2 = self.model2_2(out2)
         | 
| 124 | 
            +
                    out3 = torch.cat([out2_1, out2_2, out1], 1)
         | 
| 125 | 
            +
             | 
| 126 | 
            +
                    out3_1 = self.model3_1(out3)
         | 
| 127 | 
            +
                    out3_2 = self.model3_2(out3)
         | 
| 128 | 
            +
                    out4 = torch.cat([out3_1, out3_2, out1], 1)
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                    out4_1 = self.model4_1(out4)
         | 
| 131 | 
            +
                    out4_2 = self.model4_2(out4)
         | 
| 132 | 
            +
                    out5 = torch.cat([out4_1, out4_2, out1], 1)
         | 
| 133 | 
            +
             | 
| 134 | 
            +
                    out5_1 = self.model5_1(out5)
         | 
| 135 | 
            +
                    out5_2 = self.model5_2(out5)
         | 
| 136 | 
            +
                    out6 = torch.cat([out5_1, out5_2, out1], 1)
         | 
| 137 | 
            +
             | 
| 138 | 
            +
                    out6_1 = self.model6_1(out6)
         | 
| 139 | 
            +
                    out6_2 = self.model6_2(out6)
         | 
| 140 | 
            +
             | 
| 141 | 
            +
                    return out6_1, out6_2
         | 
| 142 | 
            +
             | 
| 143 | 
            +
            class handpose_model(nn.Module):
         | 
| 144 | 
            +
                def __init__(self):
         | 
| 145 | 
            +
                    super(handpose_model, self).__init__()
         | 
| 146 | 
            +
             | 
| 147 | 
            +
                    # these layers have no relu layer
         | 
| 148 | 
            +
                    no_relu_layers = ['conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3',\
         | 
| 149 | 
            +
                                      'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6']
         | 
| 150 | 
            +
                    # stage 1
         | 
| 151 | 
            +
                    block1_0 = OrderedDict([
         | 
| 152 | 
            +
                            ('conv1_1', [3, 64, 3, 1, 1]),
         | 
| 153 | 
            +
                            ('conv1_2', [64, 64, 3, 1, 1]),
         | 
| 154 | 
            +
                            ('pool1_stage1', [2, 2, 0]),
         | 
| 155 | 
            +
                            ('conv2_1', [64, 128, 3, 1, 1]),
         | 
| 156 | 
            +
                            ('conv2_2', [128, 128, 3, 1, 1]),
         | 
| 157 | 
            +
                            ('pool2_stage1', [2, 2, 0]),
         | 
| 158 | 
            +
                            ('conv3_1', [128, 256, 3, 1, 1]),
         | 
| 159 | 
            +
                            ('conv3_2', [256, 256, 3, 1, 1]),
         | 
| 160 | 
            +
                            ('conv3_3', [256, 256, 3, 1, 1]),
         | 
| 161 | 
            +
                            ('conv3_4', [256, 256, 3, 1, 1]),
         | 
| 162 | 
            +
                            ('pool3_stage1', [2, 2, 0]),
         | 
| 163 | 
            +
                            ('conv4_1', [256, 512, 3, 1, 1]),
         | 
| 164 | 
            +
                            ('conv4_2', [512, 512, 3, 1, 1]),
         | 
| 165 | 
            +
                            ('conv4_3', [512, 512, 3, 1, 1]),
         | 
| 166 | 
            +
                            ('conv4_4', [512, 512, 3, 1, 1]),
         | 
| 167 | 
            +
                            ('conv5_1', [512, 512, 3, 1, 1]),
         | 
| 168 | 
            +
                            ('conv5_2', [512, 512, 3, 1, 1]),
         | 
| 169 | 
            +
                            ('conv5_3_CPM', [512, 128, 3, 1, 1])
         | 
| 170 | 
            +
                        ])
         | 
| 171 | 
            +
             | 
| 172 | 
            +
                    block1_1 = OrderedDict([
         | 
| 173 | 
            +
                        ('conv6_1_CPM', [128, 512, 1, 1, 0]),
         | 
| 174 | 
            +
                        ('conv6_2_CPM', [512, 22, 1, 1, 0])
         | 
| 175 | 
            +
                    ])
         | 
| 176 | 
            +
             | 
| 177 | 
            +
                    blocks = {}
         | 
| 178 | 
            +
                    blocks['block1_0'] = block1_0
         | 
| 179 | 
            +
                    blocks['block1_1'] = block1_1
         | 
| 180 | 
            +
             | 
| 181 | 
            +
                    # stage 2-6
         | 
| 182 | 
            +
                    for i in range(2, 7):
         | 
| 183 | 
            +
                        blocks['block%d' % i] = OrderedDict([
         | 
| 184 | 
            +
                                ('Mconv1_stage%d' % i, [150, 128, 7, 1, 3]),
         | 
| 185 | 
            +
                                ('Mconv2_stage%d' % i, [128, 128, 7, 1, 3]),
         | 
| 186 | 
            +
                                ('Mconv3_stage%d' % i, [128, 128, 7, 1, 3]),
         | 
| 187 | 
            +
                                ('Mconv4_stage%d' % i, [128, 128, 7, 1, 3]),
         | 
| 188 | 
            +
                                ('Mconv5_stage%d' % i, [128, 128, 7, 1, 3]),
         | 
| 189 | 
            +
                                ('Mconv6_stage%d' % i, [128, 128, 1, 1, 0]),
         | 
| 190 | 
            +
                                ('Mconv7_stage%d' % i, [128, 22, 1, 1, 0])
         | 
| 191 | 
            +
                            ])
         | 
| 192 | 
            +
             | 
| 193 | 
            +
                    for k in blocks.keys():
         | 
| 194 | 
            +
                        blocks[k] = make_layers(blocks[k], no_relu_layers)
         | 
| 195 | 
            +
             | 
| 196 | 
            +
                    self.model1_0 = blocks['block1_0']
         | 
| 197 | 
            +
                    self.model1_1 = blocks['block1_1']
         | 
| 198 | 
            +
                    self.model2 = blocks['block2']
         | 
| 199 | 
            +
                    self.model3 = blocks['block3']
         | 
| 200 | 
            +
                    self.model4 = blocks['block4']
         | 
| 201 | 
            +
                    self.model5 = blocks['block5']
         | 
| 202 | 
            +
                    self.model6 = blocks['block6']
         | 
| 203 | 
            +
             | 
| 204 | 
            +
                def forward(self, x):
         | 
| 205 | 
            +
                    out1_0 = self.model1_0(x)
         | 
| 206 | 
            +
                    out1_1 = self.model1_1(out1_0)
         | 
| 207 | 
            +
                    concat_stage2 = torch.cat([out1_1, out1_0], 1)
         | 
| 208 | 
            +
                    out_stage2 = self.model2(concat_stage2)
         | 
| 209 | 
            +
                    concat_stage3 = torch.cat([out_stage2, out1_0], 1)
         | 
| 210 | 
            +
                    out_stage3 = self.model3(concat_stage3)
         | 
| 211 | 
            +
                    concat_stage4 = torch.cat([out_stage3, out1_0], 1)
         | 
| 212 | 
            +
                    out_stage4 = self.model4(concat_stage4)
         | 
| 213 | 
            +
                    concat_stage5 = torch.cat([out_stage4, out1_0], 1)
         | 
| 214 | 
            +
                    out_stage5 = self.model5(concat_stage5)
         | 
| 215 | 
            +
                    concat_stage6 = torch.cat([out_stage5, out1_0], 1)
         | 
| 216 | 
            +
                    out_stage6 = self.model6(concat_stage6)
         | 
| 217 | 
            +
                    return out_stage6
         | 
| 218 | 
            +
             | 
| 219 | 
            +
             | 
    	
        annotator/openpose/util.py
    ADDED
    
    | @@ -0,0 +1,164 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import math
         | 
| 2 | 
            +
            import numpy as np
         | 
| 3 | 
            +
            import matplotlib
         | 
| 4 | 
            +
            import cv2
         | 
| 5 | 
            +
             | 
| 6 | 
            +
             | 
| 7 | 
            +
            def padRightDownCorner(img, stride, padValue):
         | 
| 8 | 
            +
                h = img.shape[0]
         | 
| 9 | 
            +
                w = img.shape[1]
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                pad = 4 * [None]
         | 
| 12 | 
            +
                pad[0] = 0 # up
         | 
| 13 | 
            +
                pad[1] = 0 # left
         | 
| 14 | 
            +
                pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down
         | 
| 15 | 
            +
                pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                img_padded = img
         | 
| 18 | 
            +
                pad_up = np.tile(img_padded[0:1, :, :]*0 + padValue, (pad[0], 1, 1))
         | 
| 19 | 
            +
                img_padded = np.concatenate((pad_up, img_padded), axis=0)
         | 
| 20 | 
            +
                pad_left = np.tile(img_padded[:, 0:1, :]*0 + padValue, (1, pad[1], 1))
         | 
| 21 | 
            +
                img_padded = np.concatenate((pad_left, img_padded), axis=1)
         | 
| 22 | 
            +
                pad_down = np.tile(img_padded[-2:-1, :, :]*0 + padValue, (pad[2], 1, 1))
         | 
| 23 | 
            +
                img_padded = np.concatenate((img_padded, pad_down), axis=0)
         | 
| 24 | 
            +
                pad_right = np.tile(img_padded[:, -2:-1, :]*0 + padValue, (1, pad[3], 1))
         | 
| 25 | 
            +
                img_padded = np.concatenate((img_padded, pad_right), axis=1)
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                return img_padded, pad
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            # transfer caffe model to pytorch which will match the layer name
         | 
| 30 | 
            +
            def transfer(model, model_weights):
         | 
| 31 | 
            +
                transfered_model_weights = {}
         | 
| 32 | 
            +
                for weights_name in model.state_dict().keys():
         | 
| 33 | 
            +
                    transfered_model_weights[weights_name] = model_weights['.'.join(weights_name.split('.')[1:])]
         | 
| 34 | 
            +
                return transfered_model_weights
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            # draw the body keypoint and lims
         | 
| 37 | 
            +
            def draw_bodypose(canvas, candidate, subset):
         | 
| 38 | 
            +
                stickwidth = 4
         | 
| 39 | 
            +
                limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
         | 
| 40 | 
            +
                           [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
         | 
| 41 | 
            +
                           [1, 16], [16, 18], [3, 17], [6, 18]]
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
         | 
| 44 | 
            +
                          [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
         | 
| 45 | 
            +
                          [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
         | 
| 46 | 
            +
                for i in range(18):
         | 
| 47 | 
            +
                    for n in range(len(subset)):
         | 
| 48 | 
            +
                        index = int(subset[n][i])
         | 
| 49 | 
            +
                        if index == -1:
         | 
| 50 | 
            +
                            continue
         | 
| 51 | 
            +
                        x, y = candidate[index][0:2]
         | 
| 52 | 
            +
                        cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1)
         | 
| 53 | 
            +
                for i in range(17):
         | 
| 54 | 
            +
                    for n in range(len(subset)):
         | 
| 55 | 
            +
                        index = subset[n][np.array(limbSeq[i]) - 1]
         | 
| 56 | 
            +
                        if -1 in index:
         | 
| 57 | 
            +
                            continue
         | 
| 58 | 
            +
                        cur_canvas = canvas.copy()
         | 
| 59 | 
            +
                        Y = candidate[index.astype(int), 0]
         | 
| 60 | 
            +
                        X = candidate[index.astype(int), 1]
         | 
| 61 | 
            +
                        mX = np.mean(X)
         | 
| 62 | 
            +
                        mY = np.mean(Y)
         | 
| 63 | 
            +
                        length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
         | 
| 64 | 
            +
                        angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
         | 
| 65 | 
            +
                        polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
         | 
| 66 | 
            +
                        cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
         | 
| 67 | 
            +
                        canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
         | 
| 68 | 
            +
                # plt.imsave("preview.jpg", canvas[:, :, [2, 1, 0]])
         | 
| 69 | 
            +
                # plt.imshow(canvas[:, :, [2, 1, 0]])
         | 
| 70 | 
            +
                return canvas
         | 
| 71 | 
            +
             | 
| 72 | 
            +
             | 
| 73 | 
            +
            # image drawed by opencv is not good.
         | 
| 74 | 
            +
            def draw_handpose(canvas, all_hand_peaks, show_number=False):
         | 
| 75 | 
            +
                edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
         | 
| 76 | 
            +
                         [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                for peaks in all_hand_peaks:
         | 
| 79 | 
            +
                    for ie, e in enumerate(edges):
         | 
| 80 | 
            +
                        if np.sum(np.all(peaks[e], axis=1)==0)==0:
         | 
| 81 | 
            +
                            x1, y1 = peaks[e[0]]
         | 
| 82 | 
            +
                            x2, y2 = peaks[e[1]]
         | 
| 83 | 
            +
                            cv2.line(canvas, (x1, y1), (x2, y2), matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0])*255, thickness=2)
         | 
| 84 | 
            +
             | 
| 85 | 
            +
                    for i, keyponit in enumerate(peaks):
         | 
| 86 | 
            +
                        x, y = keyponit
         | 
| 87 | 
            +
                        cv2.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1)
         | 
| 88 | 
            +
                        if show_number:
         | 
| 89 | 
            +
                            cv2.putText(canvas, str(i), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), lineType=cv2.LINE_AA)
         | 
| 90 | 
            +
                return canvas
         | 
| 91 | 
            +
             | 
| 92 | 
            +
            # detect hand according to body pose keypoints
         | 
| 93 | 
            +
            # please refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/hand/handDetector.cpp
         | 
| 94 | 
            +
            def handDetect(candidate, subset, oriImg):
         | 
| 95 | 
            +
                # right hand: wrist 4, elbow 3, shoulder 2
         | 
| 96 | 
            +
                # left hand: wrist 7, elbow 6, shoulder 5
         | 
| 97 | 
            +
                ratioWristElbow = 0.33
         | 
| 98 | 
            +
                detect_result = []
         | 
| 99 | 
            +
                image_height, image_width = oriImg.shape[0:2]
         | 
| 100 | 
            +
                for person in subset.astype(int):
         | 
| 101 | 
            +
                    # if any of three not detected
         | 
| 102 | 
            +
                    has_left = np.sum(person[[5, 6, 7]] == -1) == 0
         | 
| 103 | 
            +
                    has_right = np.sum(person[[2, 3, 4]] == -1) == 0
         | 
| 104 | 
            +
                    if not (has_left or has_right):
         | 
| 105 | 
            +
                        continue
         | 
| 106 | 
            +
                    hands = []
         | 
| 107 | 
            +
                    #left hand
         | 
| 108 | 
            +
                    if has_left:
         | 
| 109 | 
            +
                        left_shoulder_index, left_elbow_index, left_wrist_index = person[[5, 6, 7]]
         | 
| 110 | 
            +
                        x1, y1 = candidate[left_shoulder_index][:2]
         | 
| 111 | 
            +
                        x2, y2 = candidate[left_elbow_index][:2]
         | 
| 112 | 
            +
                        x3, y3 = candidate[left_wrist_index][:2]
         | 
| 113 | 
            +
                        hands.append([x1, y1, x2, y2, x3, y3, True])
         | 
| 114 | 
            +
                    # right hand
         | 
| 115 | 
            +
                    if has_right:
         | 
| 116 | 
            +
                        right_shoulder_index, right_elbow_index, right_wrist_index = person[[2, 3, 4]]
         | 
| 117 | 
            +
                        x1, y1 = candidate[right_shoulder_index][:2]
         | 
| 118 | 
            +
                        x2, y2 = candidate[right_elbow_index][:2]
         | 
| 119 | 
            +
                        x3, y3 = candidate[right_wrist_index][:2]
         | 
| 120 | 
            +
                        hands.append([x1, y1, x2, y2, x3, y3, False])
         | 
| 121 | 
            +
             | 
| 122 | 
            +
                    for x1, y1, x2, y2, x3, y3, is_left in hands:
         | 
| 123 | 
            +
                        # pos_hand = pos_wrist + ratio * (pos_wrist - pos_elbox) = (1 + ratio) * pos_wrist - ratio * pos_elbox
         | 
| 124 | 
            +
                        # handRectangle.x = posePtr[wrist*3] + ratioWristElbow * (posePtr[wrist*3] - posePtr[elbow*3]);
         | 
| 125 | 
            +
                        # handRectangle.y = posePtr[wrist*3+1] + ratioWristElbow * (posePtr[wrist*3+1] - posePtr[elbow*3+1]);
         | 
| 126 | 
            +
                        # const auto distanceWristElbow = getDistance(poseKeypoints, person, wrist, elbow);
         | 
| 127 | 
            +
                        # const auto distanceElbowShoulder = getDistance(poseKeypoints, person, elbow, shoulder);
         | 
| 128 | 
            +
                        # handRectangle.width = 1.5f * fastMax(distanceWristElbow, 0.9f * distanceElbowShoulder);
         | 
| 129 | 
            +
                        x = x3 + ratioWristElbow * (x3 - x2)
         | 
| 130 | 
            +
                        y = y3 + ratioWristElbow * (y3 - y2)
         | 
| 131 | 
            +
                        distanceWristElbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2)
         | 
| 132 | 
            +
                        distanceElbowShoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
         | 
| 133 | 
            +
                        width = 1.5 * max(distanceWristElbow, 0.9 * distanceElbowShoulder)
         | 
| 134 | 
            +
                        # x-y refers to the center --> offset to topLeft point
         | 
| 135 | 
            +
                        # handRectangle.x -= handRectangle.width / 2.f;
         | 
| 136 | 
            +
                        # handRectangle.y -= handRectangle.height / 2.f;
         | 
| 137 | 
            +
                        x -= width / 2
         | 
| 138 | 
            +
                        y -= width / 2  # width = height
         | 
| 139 | 
            +
                        # overflow the image
         | 
| 140 | 
            +
                        if x < 0: x = 0
         | 
| 141 | 
            +
                        if y < 0: y = 0
         | 
| 142 | 
            +
                        width1 = width
         | 
| 143 | 
            +
                        width2 = width
         | 
| 144 | 
            +
                        if x + width > image_width: width1 = image_width - x
         | 
| 145 | 
            +
                        if y + width > image_height: width2 = image_height - y
         | 
| 146 | 
            +
                        width = min(width1, width2)
         | 
| 147 | 
            +
                        # the max hand box value is 20 pixels
         | 
| 148 | 
            +
                        if width >= 20:
         | 
| 149 | 
            +
                            detect_result.append([int(x), int(y), int(width), is_left])
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                '''
         | 
| 152 | 
            +
                return value: [[x, y, w, True if left hand else False]].
         | 
| 153 | 
            +
                width=height since the network require squared input.
         | 
| 154 | 
            +
                x, y is the coordinate of top left 
         | 
| 155 | 
            +
                '''
         | 
| 156 | 
            +
                return detect_result
         | 
| 157 | 
            +
             | 
| 158 | 
            +
            # get max index of 2d array
         | 
| 159 | 
            +
            def npmax(array):
         | 
| 160 | 
            +
                arrayindex = array.argmax(1)
         | 
| 161 | 
            +
                arrayvalue = array.max(1)
         | 
| 162 | 
            +
                i = arrayvalue.argmax()
         | 
| 163 | 
            +
                j = arrayindex[i]
         | 
| 164 | 
            +
                return i, j
         | 
    	
        annotator/uniformer/__init__.py
    ADDED
    
    | @@ -0,0 +1,23 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            from annotator.uniformer.mmseg.apis import init_segmentor, inference_segmentor, show_result_pyplot
         | 
| 4 | 
            +
            from annotator.uniformer.mmseg.core.evaluation import get_palette
         | 
| 5 | 
            +
            from annotator.util import annotator_ckpts_path
         | 
| 6 | 
            +
             | 
| 7 | 
            +
             | 
| 8 | 
            +
            checkpoint_file = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/upernet_global_small.pth"
         | 
| 9 | 
            +
             | 
| 10 | 
            +
             | 
| 11 | 
            +
            class UniformerDetector:
         | 
| 12 | 
            +
                def __init__(self):
         | 
| 13 | 
            +
                    modelpath = os.path.join(annotator_ckpts_path, "upernet_global_small.pth")
         | 
| 14 | 
            +
                    if not os.path.exists(modelpath):
         | 
| 15 | 
            +
                        from basicsr.utils.download_util import load_file_from_url
         | 
| 16 | 
            +
                        load_file_from_url(checkpoint_file, model_dir=annotator_ckpts_path)
         | 
| 17 | 
            +
                    config_file = os.path.join(os.path.dirname(annotator_ckpts_path), "uniformer", "exp", "upernet_global_small", "config.py")
         | 
| 18 | 
            +
                    self.model = init_segmentor(config_file, modelpath).cuda()
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                def __call__(self, img):
         | 
| 21 | 
            +
                    result = inference_segmentor(self.model, img)
         | 
| 22 | 
            +
                    res_img = show_result_pyplot(self.model, img, result, get_palette('ade'), opacity=1)
         | 
| 23 | 
            +
                    return res_img
         | 
    	
        annotator/uniformer/configs/_base_/datasets/ade20k.py
    ADDED
    
    | @@ -0,0 +1,54 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # dataset settings
         | 
| 2 | 
            +
            dataset_type = 'ADE20KDataset'
         | 
| 3 | 
            +
            data_root = 'data/ade/ADEChallengeData2016'
         | 
| 4 | 
            +
            img_norm_cfg = dict(
         | 
| 5 | 
            +
                mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
         | 
| 6 | 
            +
            crop_size = (512, 512)
         | 
| 7 | 
            +
            train_pipeline = [
         | 
| 8 | 
            +
                dict(type='LoadImageFromFile'),
         | 
| 9 | 
            +
                dict(type='LoadAnnotations', reduce_zero_label=True),
         | 
| 10 | 
            +
                dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
         | 
| 11 | 
            +
                dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
         | 
| 12 | 
            +
                dict(type='RandomFlip', prob=0.5),
         | 
| 13 | 
            +
                dict(type='PhotoMetricDistortion'),
         | 
| 14 | 
            +
                dict(type='Normalize', **img_norm_cfg),
         | 
| 15 | 
            +
                dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
         | 
| 16 | 
            +
                dict(type='DefaultFormatBundle'),
         | 
| 17 | 
            +
                dict(type='Collect', keys=['img', 'gt_semantic_seg']),
         | 
| 18 | 
            +
            ]
         | 
| 19 | 
            +
            test_pipeline = [
         | 
| 20 | 
            +
                dict(type='LoadImageFromFile'),
         | 
| 21 | 
            +
                dict(
         | 
| 22 | 
            +
                    type='MultiScaleFlipAug',
         | 
| 23 | 
            +
                    img_scale=(2048, 512),
         | 
| 24 | 
            +
                    # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
         | 
| 25 | 
            +
                    flip=False,
         | 
| 26 | 
            +
                    transforms=[
         | 
| 27 | 
            +
                        dict(type='Resize', keep_ratio=True),
         | 
| 28 | 
            +
                        dict(type='RandomFlip'),
         | 
| 29 | 
            +
                        dict(type='Normalize', **img_norm_cfg),
         | 
| 30 | 
            +
                        dict(type='ImageToTensor', keys=['img']),
         | 
| 31 | 
            +
                        dict(type='Collect', keys=['img']),
         | 
| 32 | 
            +
                    ])
         | 
| 33 | 
            +
            ]
         | 
| 34 | 
            +
            data = dict(
         | 
| 35 | 
            +
                samples_per_gpu=4,
         | 
| 36 | 
            +
                workers_per_gpu=4,
         | 
| 37 | 
            +
                train=dict(
         | 
| 38 | 
            +
                    type=dataset_type,
         | 
| 39 | 
            +
                    data_root=data_root,
         | 
| 40 | 
            +
                    img_dir='images/training',
         | 
| 41 | 
            +
                    ann_dir='annotations/training',
         | 
| 42 | 
            +
                    pipeline=train_pipeline),
         | 
| 43 | 
            +
                val=dict(
         | 
| 44 | 
            +
                    type=dataset_type,
         | 
| 45 | 
            +
                    data_root=data_root,
         | 
| 46 | 
            +
                    img_dir='images/validation',
         | 
| 47 | 
            +
                    ann_dir='annotations/validation',
         | 
| 48 | 
            +
                    pipeline=test_pipeline),
         | 
| 49 | 
            +
                test=dict(
         | 
| 50 | 
            +
                    type=dataset_type,
         | 
| 51 | 
            +
                    data_root=data_root,
         | 
| 52 | 
            +
                    img_dir='images/validation',
         | 
| 53 | 
            +
                    ann_dir='annotations/validation',
         | 
| 54 | 
            +
                    pipeline=test_pipeline))
         | 
    	
        annotator/uniformer/configs/_base_/datasets/chase_db1.py
    ADDED
    
    | @@ -0,0 +1,59 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # dataset settings
         | 
| 2 | 
            +
            dataset_type = 'ChaseDB1Dataset'
         | 
| 3 | 
            +
            data_root = 'data/CHASE_DB1'
         | 
| 4 | 
            +
            img_norm_cfg = dict(
         | 
| 5 | 
            +
                mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
         | 
| 6 | 
            +
            img_scale = (960, 999)
         | 
| 7 | 
            +
            crop_size = (128, 128)
         | 
| 8 | 
            +
            train_pipeline = [
         | 
| 9 | 
            +
                dict(type='LoadImageFromFile'),
         | 
| 10 | 
            +
                dict(type='LoadAnnotations'),
         | 
| 11 | 
            +
                dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
         | 
| 12 | 
            +
                dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
         | 
| 13 | 
            +
                dict(type='RandomFlip', prob=0.5),
         | 
| 14 | 
            +
                dict(type='PhotoMetricDistortion'),
         | 
| 15 | 
            +
                dict(type='Normalize', **img_norm_cfg),
         | 
| 16 | 
            +
                dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
         | 
| 17 | 
            +
                dict(type='DefaultFormatBundle'),
         | 
| 18 | 
            +
                dict(type='Collect', keys=['img', 'gt_semantic_seg'])
         | 
| 19 | 
            +
            ]
         | 
| 20 | 
            +
            test_pipeline = [
         | 
| 21 | 
            +
                dict(type='LoadImageFromFile'),
         | 
| 22 | 
            +
                dict(
         | 
| 23 | 
            +
                    type='MultiScaleFlipAug',
         | 
| 24 | 
            +
                    img_scale=img_scale,
         | 
| 25 | 
            +
                    # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
         | 
| 26 | 
            +
                    flip=False,
         | 
| 27 | 
            +
                    transforms=[
         | 
| 28 | 
            +
                        dict(type='Resize', keep_ratio=True),
         | 
| 29 | 
            +
                        dict(type='RandomFlip'),
         | 
| 30 | 
            +
                        dict(type='Normalize', **img_norm_cfg),
         | 
| 31 | 
            +
                        dict(type='ImageToTensor', keys=['img']),
         | 
| 32 | 
            +
                        dict(type='Collect', keys=['img'])
         | 
| 33 | 
            +
                    ])
         | 
| 34 | 
            +
            ]
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            data = dict(
         | 
| 37 | 
            +
                samples_per_gpu=4,
         | 
| 38 | 
            +
                workers_per_gpu=4,
         | 
| 39 | 
            +
                train=dict(
         | 
| 40 | 
            +
                    type='RepeatDataset',
         | 
| 41 | 
            +
                    times=40000,
         | 
| 42 | 
            +
                    dataset=dict(
         | 
| 43 | 
            +
                        type=dataset_type,
         | 
| 44 | 
            +
                        data_root=data_root,
         | 
| 45 | 
            +
                        img_dir='images/training',
         | 
| 46 | 
            +
                        ann_dir='annotations/training',
         | 
| 47 | 
            +
                        pipeline=train_pipeline)),
         | 
| 48 | 
            +
                val=dict(
         | 
| 49 | 
            +
                    type=dataset_type,
         | 
| 50 | 
            +
                    data_root=data_root,
         | 
| 51 | 
            +
                    img_dir='images/validation',
         | 
| 52 | 
            +
                    ann_dir='annotations/validation',
         | 
| 53 | 
            +
                    pipeline=test_pipeline),
         | 
| 54 | 
            +
                test=dict(
         | 
| 55 | 
            +
                    type=dataset_type,
         | 
| 56 | 
            +
                    data_root=data_root,
         | 
| 57 | 
            +
                    img_dir='images/validation',
         | 
| 58 | 
            +
                    ann_dir='annotations/validation',
         | 
| 59 | 
            +
                    pipeline=test_pipeline))
         | 
    	
        annotator/uniformer/configs/_base_/datasets/cityscapes.py
    ADDED
    
    | @@ -0,0 +1,54 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # dataset settings
         | 
| 2 | 
            +
            dataset_type = 'CityscapesDataset'
         | 
| 3 | 
            +
            data_root = 'data/cityscapes/'
         | 
| 4 | 
            +
            img_norm_cfg = dict(
         | 
| 5 | 
            +
                mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
         | 
| 6 | 
            +
            crop_size = (512, 1024)
         | 
| 7 | 
            +
            train_pipeline = [
         | 
| 8 | 
            +
                dict(type='LoadImageFromFile'),
         | 
| 9 | 
            +
                dict(type='LoadAnnotations'),
         | 
| 10 | 
            +
                dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
         | 
| 11 | 
            +
                dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
         | 
| 12 | 
            +
                dict(type='RandomFlip', prob=0.5),
         | 
| 13 | 
            +
                dict(type='PhotoMetricDistortion'),
         | 
| 14 | 
            +
                dict(type='Normalize', **img_norm_cfg),
         | 
| 15 | 
            +
                dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
         | 
| 16 | 
            +
                dict(type='DefaultFormatBundle'),
         | 
| 17 | 
            +
                dict(type='Collect', keys=['img', 'gt_semantic_seg']),
         | 
| 18 | 
            +
            ]
         | 
| 19 | 
            +
            test_pipeline = [
         | 
| 20 | 
            +
                dict(type='LoadImageFromFile'),
         | 
| 21 | 
            +
                dict(
         | 
| 22 | 
            +
                    type='MultiScaleFlipAug',
         | 
| 23 | 
            +
                    img_scale=(2048, 1024),
         | 
| 24 | 
            +
                    # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
         | 
| 25 | 
            +
                    flip=False,
         | 
| 26 | 
            +
                    transforms=[
         | 
| 27 | 
            +
                        dict(type='Resize', keep_ratio=True),
         | 
| 28 | 
            +
                        dict(type='RandomFlip'),
         | 
| 29 | 
            +
                        dict(type='Normalize', **img_norm_cfg),
         | 
| 30 | 
            +
                        dict(type='ImageToTensor', keys=['img']),
         | 
| 31 | 
            +
                        dict(type='Collect', keys=['img']),
         | 
| 32 | 
            +
                    ])
         | 
| 33 | 
            +
            ]
         | 
| 34 | 
            +
            data = dict(
         | 
| 35 | 
            +
                samples_per_gpu=2,
         | 
| 36 | 
            +
                workers_per_gpu=2,
         | 
| 37 | 
            +
                train=dict(
         | 
| 38 | 
            +
                    type=dataset_type,
         | 
| 39 | 
            +
                    data_root=data_root,
         | 
| 40 | 
            +
                    img_dir='leftImg8bit/train',
         | 
| 41 | 
            +
                    ann_dir='gtFine/train',
         | 
| 42 | 
            +
                    pipeline=train_pipeline),
         | 
| 43 | 
            +
                val=dict(
         | 
| 44 | 
            +
                    type=dataset_type,
         | 
| 45 | 
            +
                    data_root=data_root,
         | 
| 46 | 
            +
                    img_dir='leftImg8bit/val',
         | 
| 47 | 
            +
                    ann_dir='gtFine/val',
         | 
| 48 | 
            +
                    pipeline=test_pipeline),
         | 
| 49 | 
            +
                test=dict(
         | 
| 50 | 
            +
                    type=dataset_type,
         | 
| 51 | 
            +
                    data_root=data_root,
         | 
| 52 | 
            +
                    img_dir='leftImg8bit/val',
         | 
| 53 | 
            +
                    ann_dir='gtFine/val',
         | 
| 54 | 
            +
                    pipeline=test_pipeline))
         | 
    	
        annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py
    ADDED
    
    | @@ -0,0 +1,35 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            _base_ = './cityscapes.py'
         | 
| 2 | 
            +
            img_norm_cfg = dict(
         | 
| 3 | 
            +
                mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
         | 
| 4 | 
            +
            crop_size = (769, 769)
         | 
| 5 | 
            +
            train_pipeline = [
         | 
| 6 | 
            +
                dict(type='LoadImageFromFile'),
         | 
| 7 | 
            +
                dict(type='LoadAnnotations'),
         | 
| 8 | 
            +
                dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)),
         | 
| 9 | 
            +
                dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
         | 
| 10 | 
            +
                dict(type='RandomFlip', prob=0.5),
         | 
| 11 | 
            +
                dict(type='PhotoMetricDistortion'),
         | 
| 12 | 
            +
                dict(type='Normalize', **img_norm_cfg),
         | 
| 13 | 
            +
                dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
         | 
| 14 | 
            +
                dict(type='DefaultFormatBundle'),
         | 
| 15 | 
            +
                dict(type='Collect', keys=['img', 'gt_semantic_seg']),
         | 
| 16 | 
            +
            ]
         | 
| 17 | 
            +
            test_pipeline = [
         | 
| 18 | 
            +
                dict(type='LoadImageFromFile'),
         | 
| 19 | 
            +
                dict(
         | 
| 20 | 
            +
                    type='MultiScaleFlipAug',
         | 
| 21 | 
            +
                    img_scale=(2049, 1025),
         | 
| 22 | 
            +
                    # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
         | 
| 23 | 
            +
                    flip=False,
         | 
| 24 | 
            +
                    transforms=[
         | 
| 25 | 
            +
                        dict(type='Resize', keep_ratio=True),
         | 
| 26 | 
            +
                        dict(type='RandomFlip'),
         | 
| 27 | 
            +
                        dict(type='Normalize', **img_norm_cfg),
         | 
| 28 | 
            +
                        dict(type='ImageToTensor', keys=['img']),
         | 
| 29 | 
            +
                        dict(type='Collect', keys=['img']),
         | 
| 30 | 
            +
                    ])
         | 
| 31 | 
            +
            ]
         | 
| 32 | 
            +
            data = dict(
         | 
| 33 | 
            +
                train=dict(pipeline=train_pipeline),
         | 
| 34 | 
            +
                val=dict(pipeline=test_pipeline),
         | 
| 35 | 
            +
                test=dict(pipeline=test_pipeline))
         | 
    	
        annotator/uniformer/configs/_base_/datasets/drive.py
    ADDED
    
    | @@ -0,0 +1,59 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # dataset settings
         | 
| 2 | 
            +
            dataset_type = 'DRIVEDataset'
         | 
| 3 | 
            +
            data_root = 'data/DRIVE'
         | 
| 4 | 
            +
            img_norm_cfg = dict(
         | 
| 5 | 
            +
                mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
         | 
| 6 | 
            +
            img_scale = (584, 565)
         | 
| 7 | 
            +
            crop_size = (64, 64)
         | 
| 8 | 
            +
            train_pipeline = [
         | 
| 9 | 
            +
                dict(type='LoadImageFromFile'),
         | 
| 10 | 
            +
                dict(type='LoadAnnotations'),
         | 
| 11 | 
            +
                dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
         | 
| 12 | 
            +
                dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
         | 
| 13 | 
            +
                dict(type='RandomFlip', prob=0.5),
         | 
| 14 | 
            +
                dict(type='PhotoMetricDistortion'),
         | 
| 15 | 
            +
                dict(type='Normalize', **img_norm_cfg),
         | 
| 16 | 
            +
                dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
         | 
| 17 | 
            +
                dict(type='DefaultFormatBundle'),
         | 
| 18 | 
            +
                dict(type='Collect', keys=['img', 'gt_semantic_seg'])
         | 
| 19 | 
            +
            ]
         | 
| 20 | 
            +
            test_pipeline = [
         | 
| 21 | 
            +
                dict(type='LoadImageFromFile'),
         | 
| 22 | 
            +
                dict(
         | 
| 23 | 
            +
                    type='MultiScaleFlipAug',
         | 
| 24 | 
            +
                    img_scale=img_scale,
         | 
| 25 | 
            +
                    # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
         | 
| 26 | 
            +
                    flip=False,
         | 
| 27 | 
            +
                    transforms=[
         | 
| 28 | 
            +
                        dict(type='Resize', keep_ratio=True),
         | 
| 29 | 
            +
                        dict(type='RandomFlip'),
         | 
| 30 | 
            +
                        dict(type='Normalize', **img_norm_cfg),
         | 
| 31 | 
            +
                        dict(type='ImageToTensor', keys=['img']),
         | 
| 32 | 
            +
                        dict(type='Collect', keys=['img'])
         | 
| 33 | 
            +
                    ])
         | 
| 34 | 
            +
            ]
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            data = dict(
         | 
| 37 | 
            +
                samples_per_gpu=4,
         | 
| 38 | 
            +
                workers_per_gpu=4,
         | 
| 39 | 
            +
                train=dict(
         | 
| 40 | 
            +
                    type='RepeatDataset',
         | 
| 41 | 
            +
                    times=40000,
         | 
| 42 | 
            +
                    dataset=dict(
         | 
| 43 | 
            +
                        type=dataset_type,
         | 
| 44 | 
            +
                        data_root=data_root,
         | 
| 45 | 
            +
                        img_dir='images/training',
         | 
| 46 | 
            +
                        ann_dir='annotations/training',
         | 
| 47 | 
            +
                        pipeline=train_pipeline)),
         | 
| 48 | 
            +
                val=dict(
         | 
| 49 | 
            +
                    type=dataset_type,
         | 
| 50 | 
            +
                    data_root=data_root,
         | 
| 51 | 
            +
                    img_dir='images/validation',
         | 
| 52 | 
            +
                    ann_dir='annotations/validation',
         | 
| 53 | 
            +
                    pipeline=test_pipeline),
         | 
| 54 | 
            +
                test=dict(
         | 
| 55 | 
            +
                    type=dataset_type,
         | 
| 56 | 
            +
                    data_root=data_root,
         | 
| 57 | 
            +
                    img_dir='images/validation',
         | 
| 58 | 
            +
                    ann_dir='annotations/validation',
         | 
| 59 | 
            +
                    pipeline=test_pipeline))
         | 
    	
        annotator/uniformer/configs/_base_/datasets/hrf.py
    ADDED
    
    | @@ -0,0 +1,59 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # dataset settings
         | 
| 2 | 
            +
            dataset_type = 'HRFDataset'
         | 
| 3 | 
            +
            data_root = 'data/HRF'
         | 
| 4 | 
            +
            img_norm_cfg = dict(
         | 
| 5 | 
            +
                mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
         | 
| 6 | 
            +
            img_scale = (2336, 3504)
         | 
| 7 | 
            +
            crop_size = (256, 256)
         | 
| 8 | 
            +
            train_pipeline = [
         | 
| 9 | 
            +
                dict(type='LoadImageFromFile'),
         | 
| 10 | 
            +
                dict(type='LoadAnnotations'),
         | 
| 11 | 
            +
                dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
         | 
| 12 | 
            +
                dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
         | 
| 13 | 
            +
                dict(type='RandomFlip', prob=0.5),
         | 
| 14 | 
            +
                dict(type='PhotoMetricDistortion'),
         | 
| 15 | 
            +
                dict(type='Normalize', **img_norm_cfg),
         | 
| 16 | 
            +
                dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
         | 
| 17 | 
            +
                dict(type='DefaultFormatBundle'),
         | 
| 18 | 
            +
                dict(type='Collect', keys=['img', 'gt_semantic_seg'])
         | 
| 19 | 
            +
            ]
         | 
| 20 | 
            +
            test_pipeline = [
         | 
| 21 | 
            +
                dict(type='LoadImageFromFile'),
         | 
| 22 | 
            +
                dict(
         | 
| 23 | 
            +
                    type='MultiScaleFlipAug',
         | 
| 24 | 
            +
                    img_scale=img_scale,
         | 
| 25 | 
            +
                    # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
         | 
| 26 | 
            +
                    flip=False,
         | 
| 27 | 
            +
                    transforms=[
         | 
| 28 | 
            +
                        dict(type='Resize', keep_ratio=True),
         | 
| 29 | 
            +
                        dict(type='RandomFlip'),
         | 
| 30 | 
            +
                        dict(type='Normalize', **img_norm_cfg),
         | 
| 31 | 
            +
                        dict(type='ImageToTensor', keys=['img']),
         | 
| 32 | 
            +
                        dict(type='Collect', keys=['img'])
         | 
| 33 | 
            +
                    ])
         | 
| 34 | 
            +
            ]
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            data = dict(
         | 
| 37 | 
            +
                samples_per_gpu=4,
         | 
| 38 | 
            +
                workers_per_gpu=4,
         | 
| 39 | 
            +
                train=dict(
         | 
| 40 | 
            +
                    type='RepeatDataset',
         | 
| 41 | 
            +
                    times=40000,
         | 
| 42 | 
            +
                    dataset=dict(
         | 
| 43 | 
            +
                        type=dataset_type,
         | 
| 44 | 
            +
                        data_root=data_root,
         | 
| 45 | 
            +
                        img_dir='images/training',
         | 
| 46 | 
            +
                        ann_dir='annotations/training',
         | 
| 47 | 
            +
                        pipeline=train_pipeline)),
         | 
| 48 | 
            +
                val=dict(
         | 
| 49 | 
            +
                    type=dataset_type,
         | 
| 50 | 
            +
                    data_root=data_root,
         | 
| 51 | 
            +
                    img_dir='images/validation',
         | 
| 52 | 
            +
                    ann_dir='annotations/validation',
         | 
| 53 | 
            +
                    pipeline=test_pipeline),
         | 
| 54 | 
            +
                test=dict(
         | 
| 55 | 
            +
                    type=dataset_type,
         | 
| 56 | 
            +
                    data_root=data_root,
         | 
| 57 | 
            +
                    img_dir='images/validation',
         | 
| 58 | 
            +
                    ann_dir='annotations/validation',
         | 
| 59 | 
            +
                    pipeline=test_pipeline))
         | 
    	
        annotator/uniformer/configs/_base_/datasets/pascal_context.py
    ADDED
    
    | @@ -0,0 +1,60 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # dataset settings
         | 
| 2 | 
            +
            dataset_type = 'PascalContextDataset'
         | 
| 3 | 
            +
            data_root = 'data/VOCdevkit/VOC2010/'
         | 
| 4 | 
            +
            img_norm_cfg = dict(
         | 
| 5 | 
            +
                mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            img_scale = (520, 520)
         | 
| 8 | 
            +
            crop_size = (480, 480)
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            train_pipeline = [
         | 
| 11 | 
            +
                dict(type='LoadImageFromFile'),
         | 
| 12 | 
            +
                dict(type='LoadAnnotations'),
         | 
| 13 | 
            +
                dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
         | 
| 14 | 
            +
                dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
         | 
| 15 | 
            +
                dict(type='RandomFlip', prob=0.5),
         | 
| 16 | 
            +
                dict(type='PhotoMetricDistortion'),
         | 
| 17 | 
            +
                dict(type='Normalize', **img_norm_cfg),
         | 
| 18 | 
            +
                dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
         | 
| 19 | 
            +
                dict(type='DefaultFormatBundle'),
         | 
| 20 | 
            +
                dict(type='Collect', keys=['img', 'gt_semantic_seg']),
         | 
| 21 | 
            +
            ]
         | 
| 22 | 
            +
            test_pipeline = [
         | 
| 23 | 
            +
                dict(type='LoadImageFromFile'),
         | 
| 24 | 
            +
                dict(
         | 
| 25 | 
            +
                    type='MultiScaleFlipAug',
         | 
| 26 | 
            +
                    img_scale=img_scale,
         | 
| 27 | 
            +
                    # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
         | 
| 28 | 
            +
                    flip=False,
         | 
| 29 | 
            +
                    transforms=[
         | 
| 30 | 
            +
                        dict(type='Resize', keep_ratio=True),
         | 
| 31 | 
            +
                        dict(type='RandomFlip'),
         | 
| 32 | 
            +
                        dict(type='Normalize', **img_norm_cfg),
         | 
| 33 | 
            +
                        dict(type='ImageToTensor', keys=['img']),
         | 
| 34 | 
            +
                        dict(type='Collect', keys=['img']),
         | 
| 35 | 
            +
                    ])
         | 
| 36 | 
            +
            ]
         | 
| 37 | 
            +
            data = dict(
         | 
| 38 | 
            +
                samples_per_gpu=4,
         | 
| 39 | 
            +
                workers_per_gpu=4,
         | 
| 40 | 
            +
                train=dict(
         | 
| 41 | 
            +
                    type=dataset_type,
         | 
| 42 | 
            +
                    data_root=data_root,
         | 
| 43 | 
            +
                    img_dir='JPEGImages',
         | 
| 44 | 
            +
                    ann_dir='SegmentationClassContext',
         | 
| 45 | 
            +
                    split='ImageSets/SegmentationContext/train.txt',
         | 
| 46 | 
            +
                    pipeline=train_pipeline),
         | 
| 47 | 
            +
                val=dict(
         | 
| 48 | 
            +
                    type=dataset_type,
         | 
| 49 | 
            +
                    data_root=data_root,
         | 
| 50 | 
            +
                    img_dir='JPEGImages',
         | 
| 51 | 
            +
                    ann_dir='SegmentationClassContext',
         | 
| 52 | 
            +
                    split='ImageSets/SegmentationContext/val.txt',
         | 
| 53 | 
            +
                    pipeline=test_pipeline),
         | 
| 54 | 
            +
                test=dict(
         | 
| 55 | 
            +
                    type=dataset_type,
         | 
| 56 | 
            +
                    data_root=data_root,
         | 
| 57 | 
            +
                    img_dir='JPEGImages',
         | 
| 58 | 
            +
                    ann_dir='SegmentationClassContext',
         | 
| 59 | 
            +
                    split='ImageSets/SegmentationContext/val.txt',
         | 
| 60 | 
            +
                    pipeline=test_pipeline))
         | 
    	
        annotator/uniformer/configs/_base_/datasets/pascal_context_59.py
    ADDED
    
    | @@ -0,0 +1,60 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # dataset settings
         | 
| 2 | 
            +
            dataset_type = 'PascalContextDataset59'
         | 
| 3 | 
            +
            data_root = 'data/VOCdevkit/VOC2010/'
         | 
| 4 | 
            +
            img_norm_cfg = dict(
         | 
| 5 | 
            +
                mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            img_scale = (520, 520)
         | 
| 8 | 
            +
            crop_size = (480, 480)
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            train_pipeline = [
         | 
| 11 | 
            +
                dict(type='LoadImageFromFile'),
         | 
| 12 | 
            +
                dict(type='LoadAnnotations', reduce_zero_label=True),
         | 
| 13 | 
            +
                dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
         | 
| 14 | 
            +
                dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
         | 
| 15 | 
            +
                dict(type='RandomFlip', prob=0.5),
         | 
| 16 | 
            +
                dict(type='PhotoMetricDistortion'),
         | 
| 17 | 
            +
                dict(type='Normalize', **img_norm_cfg),
         | 
| 18 | 
            +
                dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
         | 
| 19 | 
            +
                dict(type='DefaultFormatBundle'),
         | 
| 20 | 
            +
                dict(type='Collect', keys=['img', 'gt_semantic_seg']),
         | 
| 21 | 
            +
            ]
         | 
| 22 | 
            +
            test_pipeline = [
         | 
| 23 | 
            +
                dict(type='LoadImageFromFile'),
         | 
| 24 | 
            +
                dict(
         | 
| 25 | 
            +
                    type='MultiScaleFlipAug',
         | 
| 26 | 
            +
                    img_scale=img_scale,
         | 
| 27 | 
            +
                    # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
         | 
| 28 | 
            +
                    flip=False,
         | 
| 29 | 
            +
                    transforms=[
         | 
| 30 | 
            +
                        dict(type='Resize', keep_ratio=True),
         | 
| 31 | 
            +
                        dict(type='RandomFlip'),
         | 
| 32 | 
            +
                        dict(type='Normalize', **img_norm_cfg),
         | 
| 33 | 
            +
                        dict(type='ImageToTensor', keys=['img']),
         | 
| 34 | 
            +
                        dict(type='Collect', keys=['img']),
         | 
| 35 | 
            +
                    ])
         | 
| 36 | 
            +
            ]
         | 
| 37 | 
            +
            data = dict(
         | 
| 38 | 
            +
                samples_per_gpu=4,
         | 
| 39 | 
            +
                workers_per_gpu=4,
         | 
| 40 | 
            +
                train=dict(
         | 
| 41 | 
            +
                    type=dataset_type,
         | 
| 42 | 
            +
                    data_root=data_root,
         | 
| 43 | 
            +
                    img_dir='JPEGImages',
         | 
| 44 | 
            +
                    ann_dir='SegmentationClassContext',
         | 
| 45 | 
            +
                    split='ImageSets/SegmentationContext/train.txt',
         | 
| 46 | 
            +
                    pipeline=train_pipeline),
         | 
| 47 | 
            +
                val=dict(
         | 
| 48 | 
            +
                    type=dataset_type,
         | 
| 49 | 
            +
                    data_root=data_root,
         | 
| 50 | 
            +
                    img_dir='JPEGImages',
         | 
| 51 | 
            +
                    ann_dir='SegmentationClassContext',
         | 
| 52 | 
            +
                    split='ImageSets/SegmentationContext/val.txt',
         | 
| 53 | 
            +
                    pipeline=test_pipeline),
         | 
| 54 | 
            +
                test=dict(
         | 
| 55 | 
            +
                    type=dataset_type,
         | 
| 56 | 
            +
                    data_root=data_root,
         | 
| 57 | 
            +
                    img_dir='JPEGImages',
         | 
| 58 | 
            +
                    ann_dir='SegmentationClassContext',
         | 
| 59 | 
            +
                    split='ImageSets/SegmentationContext/val.txt',
         | 
| 60 | 
            +
                    pipeline=test_pipeline))
         | 
    	
        annotator/uniformer/configs/_base_/datasets/pascal_voc12.py
    ADDED
    
    | @@ -0,0 +1,57 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # dataset settings
         | 
| 2 | 
            +
            dataset_type = 'PascalVOCDataset'
         | 
| 3 | 
            +
            data_root = 'data/VOCdevkit/VOC2012'
         | 
| 4 | 
            +
            img_norm_cfg = dict(
         | 
| 5 | 
            +
                mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
         | 
| 6 | 
            +
            crop_size = (512, 512)
         | 
| 7 | 
            +
            train_pipeline = [
         | 
| 8 | 
            +
                dict(type='LoadImageFromFile'),
         | 
| 9 | 
            +
                dict(type='LoadAnnotations'),
         | 
| 10 | 
            +
                dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
         | 
| 11 | 
            +
                dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
         | 
| 12 | 
            +
                dict(type='RandomFlip', prob=0.5),
         | 
| 13 | 
            +
                dict(type='PhotoMetricDistortion'),
         | 
| 14 | 
            +
                dict(type='Normalize', **img_norm_cfg),
         | 
| 15 | 
            +
                dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
         | 
| 16 | 
            +
                dict(type='DefaultFormatBundle'),
         | 
| 17 | 
            +
                dict(type='Collect', keys=['img', 'gt_semantic_seg']),
         | 
| 18 | 
            +
            ]
         | 
| 19 | 
            +
            test_pipeline = [
         | 
| 20 | 
            +
                dict(type='LoadImageFromFile'),
         | 
| 21 | 
            +
                dict(
         | 
| 22 | 
            +
                    type='MultiScaleFlipAug',
         | 
| 23 | 
            +
                    img_scale=(2048, 512),
         | 
| 24 | 
            +
                    # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
         | 
| 25 | 
            +
                    flip=False,
         | 
| 26 | 
            +
                    transforms=[
         | 
| 27 | 
            +
                        dict(type='Resize', keep_ratio=True),
         | 
| 28 | 
            +
                        dict(type='RandomFlip'),
         | 
| 29 | 
            +
                        dict(type='Normalize', **img_norm_cfg),
         | 
| 30 | 
            +
                        dict(type='ImageToTensor', keys=['img']),
         | 
| 31 | 
            +
                        dict(type='Collect', keys=['img']),
         | 
| 32 | 
            +
                    ])
         | 
| 33 | 
            +
            ]
         | 
| 34 | 
            +
            data = dict(
         | 
| 35 | 
            +
                samples_per_gpu=4,
         | 
| 36 | 
            +
                workers_per_gpu=4,
         | 
| 37 | 
            +
                train=dict(
         | 
| 38 | 
            +
                    type=dataset_type,
         | 
| 39 | 
            +
                    data_root=data_root,
         | 
| 40 | 
            +
                    img_dir='JPEGImages',
         | 
| 41 | 
            +
                    ann_dir='SegmentationClass',
         | 
| 42 | 
            +
                    split='ImageSets/Segmentation/train.txt',
         | 
| 43 | 
            +
                    pipeline=train_pipeline),
         | 
| 44 | 
            +
                val=dict(
         | 
| 45 | 
            +
                    type=dataset_type,
         | 
| 46 | 
            +
                    data_root=data_root,
         | 
| 47 | 
            +
                    img_dir='JPEGImages',
         | 
| 48 | 
            +
                    ann_dir='SegmentationClass',
         | 
| 49 | 
            +
                    split='ImageSets/Segmentation/val.txt',
         | 
| 50 | 
            +
                    pipeline=test_pipeline),
         | 
| 51 | 
            +
                test=dict(
         | 
| 52 | 
            +
                    type=dataset_type,
         | 
| 53 | 
            +
                    data_root=data_root,
         | 
| 54 | 
            +
                    img_dir='JPEGImages',
         | 
| 55 | 
            +
                    ann_dir='SegmentationClass',
         | 
| 56 | 
            +
                    split='ImageSets/Segmentation/val.txt',
         | 
| 57 | 
            +
                    pipeline=test_pipeline))
         | 
    	
        annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py
    ADDED
    
    | @@ -0,0 +1,9 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            _base_ = './pascal_voc12.py'
         | 
| 2 | 
            +
            # dataset settings
         | 
| 3 | 
            +
            data = dict(
         | 
| 4 | 
            +
                train=dict(
         | 
| 5 | 
            +
                    ann_dir=['SegmentationClass', 'SegmentationClassAug'],
         | 
| 6 | 
            +
                    split=[
         | 
| 7 | 
            +
                        'ImageSets/Segmentation/train.txt',
         | 
| 8 | 
            +
                        'ImageSets/Segmentation/aug.txt'
         | 
| 9 | 
            +
                    ]))
         | 
    	
        annotator/uniformer/configs/_base_/datasets/stare.py
    ADDED
    
    | @@ -0,0 +1,59 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # dataset settings
         | 
| 2 | 
            +
            dataset_type = 'STAREDataset'
         | 
| 3 | 
            +
            data_root = 'data/STARE'
         | 
| 4 | 
            +
            img_norm_cfg = dict(
         | 
| 5 | 
            +
                mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
         | 
| 6 | 
            +
            img_scale = (605, 700)
         | 
| 7 | 
            +
            crop_size = (128, 128)
         | 
| 8 | 
            +
            train_pipeline = [
         | 
| 9 | 
            +
                dict(type='LoadImageFromFile'),
         | 
| 10 | 
            +
                dict(type='LoadAnnotations'),
         | 
| 11 | 
            +
                dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
         | 
| 12 | 
            +
                dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
         | 
| 13 | 
            +
                dict(type='RandomFlip', prob=0.5),
         | 
| 14 | 
            +
                dict(type='PhotoMetricDistortion'),
         | 
| 15 | 
            +
                dict(type='Normalize', **img_norm_cfg),
         | 
| 16 | 
            +
                dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
         | 
| 17 | 
            +
                dict(type='DefaultFormatBundle'),
         | 
| 18 | 
            +
                dict(type='Collect', keys=['img', 'gt_semantic_seg'])
         | 
| 19 | 
            +
            ]
         | 
| 20 | 
            +
            test_pipeline = [
         | 
| 21 | 
            +
                dict(type='LoadImageFromFile'),
         | 
| 22 | 
            +
                dict(
         | 
| 23 | 
            +
                    type='MultiScaleFlipAug',
         | 
| 24 | 
            +
                    img_scale=img_scale,
         | 
| 25 | 
            +
                    # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
         | 
| 26 | 
            +
                    flip=False,
         | 
| 27 | 
            +
                    transforms=[
         | 
| 28 | 
            +
                        dict(type='Resize', keep_ratio=True),
         | 
| 29 | 
            +
                        dict(type='RandomFlip'),
         | 
| 30 | 
            +
                        dict(type='Normalize', **img_norm_cfg),
         | 
| 31 | 
            +
                        dict(type='ImageToTensor', keys=['img']),
         | 
| 32 | 
            +
                        dict(type='Collect', keys=['img'])
         | 
| 33 | 
            +
                    ])
         | 
| 34 | 
            +
            ]
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            data = dict(
         | 
| 37 | 
            +
                samples_per_gpu=4,
         | 
| 38 | 
            +
                workers_per_gpu=4,
         | 
| 39 | 
            +
                train=dict(
         | 
| 40 | 
            +
                    type='RepeatDataset',
         | 
| 41 | 
            +
                    times=40000,
         | 
| 42 | 
            +
                    dataset=dict(
         | 
| 43 | 
            +
                        type=dataset_type,
         | 
| 44 | 
            +
                        data_root=data_root,
         | 
| 45 | 
            +
                        img_dir='images/training',
         | 
| 46 | 
            +
                        ann_dir='annotations/training',
         | 
| 47 | 
            +
                        pipeline=train_pipeline)),
         | 
| 48 | 
            +
                val=dict(
         | 
| 49 | 
            +
                    type=dataset_type,
         | 
| 50 | 
            +
                    data_root=data_root,
         | 
| 51 | 
            +
                    img_dir='images/validation',
         | 
| 52 | 
            +
                    ann_dir='annotations/validation',
         | 
| 53 | 
            +
                    pipeline=test_pipeline),
         | 
| 54 | 
            +
                test=dict(
         | 
| 55 | 
            +
                    type=dataset_type,
         | 
| 56 | 
            +
                    data_root=data_root,
         | 
| 57 | 
            +
                    img_dir='images/validation',
         | 
| 58 | 
            +
                    ann_dir='annotations/validation',
         | 
| 59 | 
            +
                    pipeline=test_pipeline))
         | 
    	
        annotator/uniformer/configs/_base_/default_runtime.py
    ADDED
    
    | @@ -0,0 +1,14 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # yapf:disable
         | 
| 2 | 
            +
            log_config = dict(
         | 
| 3 | 
            +
                interval=50,
         | 
| 4 | 
            +
                hooks=[
         | 
| 5 | 
            +
                    dict(type='TextLoggerHook', by_epoch=False),
         | 
| 6 | 
            +
                    # dict(type='TensorboardLoggerHook')
         | 
| 7 | 
            +
                ])
         | 
| 8 | 
            +
            # yapf:enable
         | 
| 9 | 
            +
            dist_params = dict(backend='nccl')
         | 
| 10 | 
            +
            log_level = 'INFO'
         | 
| 11 | 
            +
            load_from = None
         | 
| 12 | 
            +
            resume_from = None
         | 
| 13 | 
            +
            workflow = [('train', 1)]
         | 
| 14 | 
            +
            cudnn_benchmark = True
         | 
    	
        annotator/uniformer/configs/_base_/models/ann_r50-d8.py
    ADDED
    
    | @@ -0,0 +1,46 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                pretrained='open-mmlab://resnet50_v1c',
         | 
| 6 | 
            +
                backbone=dict(
         | 
| 7 | 
            +
                    type='ResNetV1c',
         | 
| 8 | 
            +
                    depth=50,
         | 
| 9 | 
            +
                    num_stages=4,
         | 
| 10 | 
            +
                    out_indices=(0, 1, 2, 3),
         | 
| 11 | 
            +
                    dilations=(1, 1, 2, 4),
         | 
| 12 | 
            +
                    strides=(1, 2, 1, 1),
         | 
| 13 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 14 | 
            +
                    norm_eval=False,
         | 
| 15 | 
            +
                    style='pytorch',
         | 
| 16 | 
            +
                    contract_dilation=True),
         | 
| 17 | 
            +
                decode_head=dict(
         | 
| 18 | 
            +
                    type='ANNHead',
         | 
| 19 | 
            +
                    in_channels=[1024, 2048],
         | 
| 20 | 
            +
                    in_index=[2, 3],
         | 
| 21 | 
            +
                    channels=512,
         | 
| 22 | 
            +
                    project_channels=256,
         | 
| 23 | 
            +
                    query_scales=(1, ),
         | 
| 24 | 
            +
                    key_pool_scales=(1, 3, 6, 8),
         | 
| 25 | 
            +
                    dropout_ratio=0.1,
         | 
| 26 | 
            +
                    num_classes=19,
         | 
| 27 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 28 | 
            +
                    align_corners=False,
         | 
| 29 | 
            +
                    loss_decode=dict(
         | 
| 30 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 31 | 
            +
                auxiliary_head=dict(
         | 
| 32 | 
            +
                    type='FCNHead',
         | 
| 33 | 
            +
                    in_channels=1024,
         | 
| 34 | 
            +
                    in_index=2,
         | 
| 35 | 
            +
                    channels=256,
         | 
| 36 | 
            +
                    num_convs=1,
         | 
| 37 | 
            +
                    concat_input=False,
         | 
| 38 | 
            +
                    dropout_ratio=0.1,
         | 
| 39 | 
            +
                    num_classes=19,
         | 
| 40 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 41 | 
            +
                    align_corners=False,
         | 
| 42 | 
            +
                    loss_decode=dict(
         | 
| 43 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
         | 
| 44 | 
            +
                # model training and testing settings
         | 
| 45 | 
            +
                train_cfg=dict(),
         | 
| 46 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py
    ADDED
    
    | @@ -0,0 +1,44 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                pretrained='open-mmlab://resnet50_v1c',
         | 
| 6 | 
            +
                backbone=dict(
         | 
| 7 | 
            +
                    type='ResNetV1c',
         | 
| 8 | 
            +
                    depth=50,
         | 
| 9 | 
            +
                    num_stages=4,
         | 
| 10 | 
            +
                    out_indices=(0, 1, 2, 3),
         | 
| 11 | 
            +
                    dilations=(1, 1, 2, 4),
         | 
| 12 | 
            +
                    strides=(1, 2, 1, 1),
         | 
| 13 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 14 | 
            +
                    norm_eval=False,
         | 
| 15 | 
            +
                    style='pytorch',
         | 
| 16 | 
            +
                    contract_dilation=True),
         | 
| 17 | 
            +
                decode_head=dict(
         | 
| 18 | 
            +
                    type='APCHead',
         | 
| 19 | 
            +
                    in_channels=2048,
         | 
| 20 | 
            +
                    in_index=3,
         | 
| 21 | 
            +
                    channels=512,
         | 
| 22 | 
            +
                    pool_scales=(1, 2, 3, 6),
         | 
| 23 | 
            +
                    dropout_ratio=0.1,
         | 
| 24 | 
            +
                    num_classes=19,
         | 
| 25 | 
            +
                    norm_cfg=dict(type='SyncBN', requires_grad=True),
         | 
| 26 | 
            +
                    align_corners=False,
         | 
| 27 | 
            +
                    loss_decode=dict(
         | 
| 28 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 29 | 
            +
                auxiliary_head=dict(
         | 
| 30 | 
            +
                    type='FCNHead',
         | 
| 31 | 
            +
                    in_channels=1024,
         | 
| 32 | 
            +
                    in_index=2,
         | 
| 33 | 
            +
                    channels=256,
         | 
| 34 | 
            +
                    num_convs=1,
         | 
| 35 | 
            +
                    concat_input=False,
         | 
| 36 | 
            +
                    dropout_ratio=0.1,
         | 
| 37 | 
            +
                    num_classes=19,
         | 
| 38 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 39 | 
            +
                    align_corners=False,
         | 
| 40 | 
            +
                    loss_decode=dict(
         | 
| 41 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
         | 
| 42 | 
            +
                # model training and testing settings
         | 
| 43 | 
            +
                train_cfg=dict(),
         | 
| 44 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py
    ADDED
    
    | @@ -0,0 +1,44 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                pretrained='open-mmlab://resnet50_v1c',
         | 
| 6 | 
            +
                backbone=dict(
         | 
| 7 | 
            +
                    type='ResNetV1c',
         | 
| 8 | 
            +
                    depth=50,
         | 
| 9 | 
            +
                    num_stages=4,
         | 
| 10 | 
            +
                    out_indices=(0, 1, 2, 3),
         | 
| 11 | 
            +
                    dilations=(1, 1, 2, 4),
         | 
| 12 | 
            +
                    strides=(1, 2, 1, 1),
         | 
| 13 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 14 | 
            +
                    norm_eval=False,
         | 
| 15 | 
            +
                    style='pytorch',
         | 
| 16 | 
            +
                    contract_dilation=True),
         | 
| 17 | 
            +
                decode_head=dict(
         | 
| 18 | 
            +
                    type='CCHead',
         | 
| 19 | 
            +
                    in_channels=2048,
         | 
| 20 | 
            +
                    in_index=3,
         | 
| 21 | 
            +
                    channels=512,
         | 
| 22 | 
            +
                    recurrence=2,
         | 
| 23 | 
            +
                    dropout_ratio=0.1,
         | 
| 24 | 
            +
                    num_classes=19,
         | 
| 25 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 26 | 
            +
                    align_corners=False,
         | 
| 27 | 
            +
                    loss_decode=dict(
         | 
| 28 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 29 | 
            +
                auxiliary_head=dict(
         | 
| 30 | 
            +
                    type='FCNHead',
         | 
| 31 | 
            +
                    in_channels=1024,
         | 
| 32 | 
            +
                    in_index=2,
         | 
| 33 | 
            +
                    channels=256,
         | 
| 34 | 
            +
                    num_convs=1,
         | 
| 35 | 
            +
                    concat_input=False,
         | 
| 36 | 
            +
                    dropout_ratio=0.1,
         | 
| 37 | 
            +
                    num_classes=19,
         | 
| 38 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 39 | 
            +
                    align_corners=False,
         | 
| 40 | 
            +
                    loss_decode=dict(
         | 
| 41 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
         | 
| 42 | 
            +
                # model training and testing settings
         | 
| 43 | 
            +
                train_cfg=dict(),
         | 
| 44 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/cgnet.py
    ADDED
    
    | @@ -0,0 +1,35 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                backbone=dict(
         | 
| 6 | 
            +
                    type='CGNet',
         | 
| 7 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 8 | 
            +
                    in_channels=3,
         | 
| 9 | 
            +
                    num_channels=(32, 64, 128),
         | 
| 10 | 
            +
                    num_blocks=(3, 21),
         | 
| 11 | 
            +
                    dilations=(2, 4),
         | 
| 12 | 
            +
                    reductions=(8, 16)),
         | 
| 13 | 
            +
                decode_head=dict(
         | 
| 14 | 
            +
                    type='FCNHead',
         | 
| 15 | 
            +
                    in_channels=256,
         | 
| 16 | 
            +
                    in_index=2,
         | 
| 17 | 
            +
                    channels=256,
         | 
| 18 | 
            +
                    num_convs=0,
         | 
| 19 | 
            +
                    concat_input=False,
         | 
| 20 | 
            +
                    dropout_ratio=0,
         | 
| 21 | 
            +
                    num_classes=19,
         | 
| 22 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 23 | 
            +
                    loss_decode=dict(
         | 
| 24 | 
            +
                        type='CrossEntropyLoss',
         | 
| 25 | 
            +
                        use_sigmoid=False,
         | 
| 26 | 
            +
                        loss_weight=1.0,
         | 
| 27 | 
            +
                        class_weight=[
         | 
| 28 | 
            +
                            2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352,
         | 
| 29 | 
            +
                            10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905,
         | 
| 30 | 
            +
                            10.347791, 6.3927646, 10.226669, 10.241062, 10.280587,
         | 
| 31 | 
            +
                            10.396974, 10.055647
         | 
| 32 | 
            +
                        ])),
         | 
| 33 | 
            +
                # model training and testing settings
         | 
| 34 | 
            +
                train_cfg=dict(sampler=None),
         | 
| 35 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/danet_r50-d8.py
    ADDED
    
    | @@ -0,0 +1,44 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                pretrained='open-mmlab://resnet50_v1c',
         | 
| 6 | 
            +
                backbone=dict(
         | 
| 7 | 
            +
                    type='ResNetV1c',
         | 
| 8 | 
            +
                    depth=50,
         | 
| 9 | 
            +
                    num_stages=4,
         | 
| 10 | 
            +
                    out_indices=(0, 1, 2, 3),
         | 
| 11 | 
            +
                    dilations=(1, 1, 2, 4),
         | 
| 12 | 
            +
                    strides=(1, 2, 1, 1),
         | 
| 13 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 14 | 
            +
                    norm_eval=False,
         | 
| 15 | 
            +
                    style='pytorch',
         | 
| 16 | 
            +
                    contract_dilation=True),
         | 
| 17 | 
            +
                decode_head=dict(
         | 
| 18 | 
            +
                    type='DAHead',
         | 
| 19 | 
            +
                    in_channels=2048,
         | 
| 20 | 
            +
                    in_index=3,
         | 
| 21 | 
            +
                    channels=512,
         | 
| 22 | 
            +
                    pam_channels=64,
         | 
| 23 | 
            +
                    dropout_ratio=0.1,
         | 
| 24 | 
            +
                    num_classes=19,
         | 
| 25 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 26 | 
            +
                    align_corners=False,
         | 
| 27 | 
            +
                    loss_decode=dict(
         | 
| 28 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 29 | 
            +
                auxiliary_head=dict(
         | 
| 30 | 
            +
                    type='FCNHead',
         | 
| 31 | 
            +
                    in_channels=1024,
         | 
| 32 | 
            +
                    in_index=2,
         | 
| 33 | 
            +
                    channels=256,
         | 
| 34 | 
            +
                    num_convs=1,
         | 
| 35 | 
            +
                    concat_input=False,
         | 
| 36 | 
            +
                    dropout_ratio=0.1,
         | 
| 37 | 
            +
                    num_classes=19,
         | 
| 38 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 39 | 
            +
                    align_corners=False,
         | 
| 40 | 
            +
                    loss_decode=dict(
         | 
| 41 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
         | 
| 42 | 
            +
                # model training and testing settings
         | 
| 43 | 
            +
                train_cfg=dict(),
         | 
| 44 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py
    ADDED
    
    | @@ -0,0 +1,44 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                pretrained='open-mmlab://resnet50_v1c',
         | 
| 6 | 
            +
                backbone=dict(
         | 
| 7 | 
            +
                    type='ResNetV1c',
         | 
| 8 | 
            +
                    depth=50,
         | 
| 9 | 
            +
                    num_stages=4,
         | 
| 10 | 
            +
                    out_indices=(0, 1, 2, 3),
         | 
| 11 | 
            +
                    dilations=(1, 1, 2, 4),
         | 
| 12 | 
            +
                    strides=(1, 2, 1, 1),
         | 
| 13 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 14 | 
            +
                    norm_eval=False,
         | 
| 15 | 
            +
                    style='pytorch',
         | 
| 16 | 
            +
                    contract_dilation=True),
         | 
| 17 | 
            +
                decode_head=dict(
         | 
| 18 | 
            +
                    type='ASPPHead',
         | 
| 19 | 
            +
                    in_channels=2048,
         | 
| 20 | 
            +
                    in_index=3,
         | 
| 21 | 
            +
                    channels=512,
         | 
| 22 | 
            +
                    dilations=(1, 12, 24, 36),
         | 
| 23 | 
            +
                    dropout_ratio=0.1,
         | 
| 24 | 
            +
                    num_classes=19,
         | 
| 25 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 26 | 
            +
                    align_corners=False,
         | 
| 27 | 
            +
                    loss_decode=dict(
         | 
| 28 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 29 | 
            +
                auxiliary_head=dict(
         | 
| 30 | 
            +
                    type='FCNHead',
         | 
| 31 | 
            +
                    in_channels=1024,
         | 
| 32 | 
            +
                    in_index=2,
         | 
| 33 | 
            +
                    channels=256,
         | 
| 34 | 
            +
                    num_convs=1,
         | 
| 35 | 
            +
                    concat_input=False,
         | 
| 36 | 
            +
                    dropout_ratio=0.1,
         | 
| 37 | 
            +
                    num_classes=19,
         | 
| 38 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 39 | 
            +
                    align_corners=False,
         | 
| 40 | 
            +
                    loss_decode=dict(
         | 
| 41 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
         | 
| 42 | 
            +
                # model training and testing settings
         | 
| 43 | 
            +
                train_cfg=dict(),
         | 
| 44 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py
    ADDED
    
    | @@ -0,0 +1,50 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                pretrained=None,
         | 
| 6 | 
            +
                backbone=dict(
         | 
| 7 | 
            +
                    type='UNet',
         | 
| 8 | 
            +
                    in_channels=3,
         | 
| 9 | 
            +
                    base_channels=64,
         | 
| 10 | 
            +
                    num_stages=5,
         | 
| 11 | 
            +
                    strides=(1, 1, 1, 1, 1),
         | 
| 12 | 
            +
                    enc_num_convs=(2, 2, 2, 2, 2),
         | 
| 13 | 
            +
                    dec_num_convs=(2, 2, 2, 2),
         | 
| 14 | 
            +
                    downsamples=(True, True, True, True),
         | 
| 15 | 
            +
                    enc_dilations=(1, 1, 1, 1, 1),
         | 
| 16 | 
            +
                    dec_dilations=(1, 1, 1, 1),
         | 
| 17 | 
            +
                    with_cp=False,
         | 
| 18 | 
            +
                    conv_cfg=None,
         | 
| 19 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 20 | 
            +
                    act_cfg=dict(type='ReLU'),
         | 
| 21 | 
            +
                    upsample_cfg=dict(type='InterpConv'),
         | 
| 22 | 
            +
                    norm_eval=False),
         | 
| 23 | 
            +
                decode_head=dict(
         | 
| 24 | 
            +
                    type='ASPPHead',
         | 
| 25 | 
            +
                    in_channels=64,
         | 
| 26 | 
            +
                    in_index=4,
         | 
| 27 | 
            +
                    channels=16,
         | 
| 28 | 
            +
                    dilations=(1, 12, 24, 36),
         | 
| 29 | 
            +
                    dropout_ratio=0.1,
         | 
| 30 | 
            +
                    num_classes=2,
         | 
| 31 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 32 | 
            +
                    align_corners=False,
         | 
| 33 | 
            +
                    loss_decode=dict(
         | 
| 34 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 35 | 
            +
                auxiliary_head=dict(
         | 
| 36 | 
            +
                    type='FCNHead',
         | 
| 37 | 
            +
                    in_channels=128,
         | 
| 38 | 
            +
                    in_index=3,
         | 
| 39 | 
            +
                    channels=64,
         | 
| 40 | 
            +
                    num_convs=1,
         | 
| 41 | 
            +
                    concat_input=False,
         | 
| 42 | 
            +
                    dropout_ratio=0.1,
         | 
| 43 | 
            +
                    num_classes=2,
         | 
| 44 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 45 | 
            +
                    align_corners=False,
         | 
| 46 | 
            +
                    loss_decode=dict(
         | 
| 47 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
         | 
| 48 | 
            +
                # model training and testing settings
         | 
| 49 | 
            +
                train_cfg=dict(),
         | 
| 50 | 
            +
                test_cfg=dict(mode='slide', crop_size=256, stride=170))
         | 
    	
        annotator/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py
    ADDED
    
    | @@ -0,0 +1,46 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                pretrained='open-mmlab://resnet50_v1c',
         | 
| 6 | 
            +
                backbone=dict(
         | 
| 7 | 
            +
                    type='ResNetV1c',
         | 
| 8 | 
            +
                    depth=50,
         | 
| 9 | 
            +
                    num_stages=4,
         | 
| 10 | 
            +
                    out_indices=(0, 1, 2, 3),
         | 
| 11 | 
            +
                    dilations=(1, 1, 2, 4),
         | 
| 12 | 
            +
                    strides=(1, 2, 1, 1),
         | 
| 13 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 14 | 
            +
                    norm_eval=False,
         | 
| 15 | 
            +
                    style='pytorch',
         | 
| 16 | 
            +
                    contract_dilation=True),
         | 
| 17 | 
            +
                decode_head=dict(
         | 
| 18 | 
            +
                    type='DepthwiseSeparableASPPHead',
         | 
| 19 | 
            +
                    in_channels=2048,
         | 
| 20 | 
            +
                    in_index=3,
         | 
| 21 | 
            +
                    channels=512,
         | 
| 22 | 
            +
                    dilations=(1, 12, 24, 36),
         | 
| 23 | 
            +
                    c1_in_channels=256,
         | 
| 24 | 
            +
                    c1_channels=48,
         | 
| 25 | 
            +
                    dropout_ratio=0.1,
         | 
| 26 | 
            +
                    num_classes=19,
         | 
| 27 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 28 | 
            +
                    align_corners=False,
         | 
| 29 | 
            +
                    loss_decode=dict(
         | 
| 30 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 31 | 
            +
                auxiliary_head=dict(
         | 
| 32 | 
            +
                    type='FCNHead',
         | 
| 33 | 
            +
                    in_channels=1024,
         | 
| 34 | 
            +
                    in_index=2,
         | 
| 35 | 
            +
                    channels=256,
         | 
| 36 | 
            +
                    num_convs=1,
         | 
| 37 | 
            +
                    concat_input=False,
         | 
| 38 | 
            +
                    dropout_ratio=0.1,
         | 
| 39 | 
            +
                    num_classes=19,
         | 
| 40 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 41 | 
            +
                    align_corners=False,
         | 
| 42 | 
            +
                    loss_decode=dict(
         | 
| 43 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
         | 
| 44 | 
            +
                # model training and testing settings
         | 
| 45 | 
            +
                train_cfg=dict(),
         | 
| 46 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/dmnet_r50-d8.py
    ADDED
    
    | @@ -0,0 +1,44 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                pretrained='open-mmlab://resnet50_v1c',
         | 
| 6 | 
            +
                backbone=dict(
         | 
| 7 | 
            +
                    type='ResNetV1c',
         | 
| 8 | 
            +
                    depth=50,
         | 
| 9 | 
            +
                    num_stages=4,
         | 
| 10 | 
            +
                    out_indices=(0, 1, 2, 3),
         | 
| 11 | 
            +
                    dilations=(1, 1, 2, 4),
         | 
| 12 | 
            +
                    strides=(1, 2, 1, 1),
         | 
| 13 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 14 | 
            +
                    norm_eval=False,
         | 
| 15 | 
            +
                    style='pytorch',
         | 
| 16 | 
            +
                    contract_dilation=True),
         | 
| 17 | 
            +
                decode_head=dict(
         | 
| 18 | 
            +
                    type='DMHead',
         | 
| 19 | 
            +
                    in_channels=2048,
         | 
| 20 | 
            +
                    in_index=3,
         | 
| 21 | 
            +
                    channels=512,
         | 
| 22 | 
            +
                    filter_sizes=(1, 3, 5, 7),
         | 
| 23 | 
            +
                    dropout_ratio=0.1,
         | 
| 24 | 
            +
                    num_classes=19,
         | 
| 25 | 
            +
                    norm_cfg=dict(type='SyncBN', requires_grad=True),
         | 
| 26 | 
            +
                    align_corners=False,
         | 
| 27 | 
            +
                    loss_decode=dict(
         | 
| 28 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 29 | 
            +
                auxiliary_head=dict(
         | 
| 30 | 
            +
                    type='FCNHead',
         | 
| 31 | 
            +
                    in_channels=1024,
         | 
| 32 | 
            +
                    in_index=2,
         | 
| 33 | 
            +
                    channels=256,
         | 
| 34 | 
            +
                    num_convs=1,
         | 
| 35 | 
            +
                    concat_input=False,
         | 
| 36 | 
            +
                    dropout_ratio=0.1,
         | 
| 37 | 
            +
                    num_classes=19,
         | 
| 38 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 39 | 
            +
                    align_corners=False,
         | 
| 40 | 
            +
                    loss_decode=dict(
         | 
| 41 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
         | 
| 42 | 
            +
                # model training and testing settings
         | 
| 43 | 
            +
                train_cfg=dict(),
         | 
| 44 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/dnl_r50-d8.py
    ADDED
    
    | @@ -0,0 +1,46 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                pretrained='open-mmlab://resnet50_v1c',
         | 
| 6 | 
            +
                backbone=dict(
         | 
| 7 | 
            +
                    type='ResNetV1c',
         | 
| 8 | 
            +
                    depth=50,
         | 
| 9 | 
            +
                    num_stages=4,
         | 
| 10 | 
            +
                    out_indices=(0, 1, 2, 3),
         | 
| 11 | 
            +
                    dilations=(1, 1, 2, 4),
         | 
| 12 | 
            +
                    strides=(1, 2, 1, 1),
         | 
| 13 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 14 | 
            +
                    norm_eval=False,
         | 
| 15 | 
            +
                    style='pytorch',
         | 
| 16 | 
            +
                    contract_dilation=True),
         | 
| 17 | 
            +
                decode_head=dict(
         | 
| 18 | 
            +
                    type='DNLHead',
         | 
| 19 | 
            +
                    in_channels=2048,
         | 
| 20 | 
            +
                    in_index=3,
         | 
| 21 | 
            +
                    channels=512,
         | 
| 22 | 
            +
                    dropout_ratio=0.1,
         | 
| 23 | 
            +
                    reduction=2,
         | 
| 24 | 
            +
                    use_scale=True,
         | 
| 25 | 
            +
                    mode='embedded_gaussian',
         | 
| 26 | 
            +
                    num_classes=19,
         | 
| 27 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 28 | 
            +
                    align_corners=False,
         | 
| 29 | 
            +
                    loss_decode=dict(
         | 
| 30 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 31 | 
            +
                auxiliary_head=dict(
         | 
| 32 | 
            +
                    type='FCNHead',
         | 
| 33 | 
            +
                    in_channels=1024,
         | 
| 34 | 
            +
                    in_index=2,
         | 
| 35 | 
            +
                    channels=256,
         | 
| 36 | 
            +
                    num_convs=1,
         | 
| 37 | 
            +
                    concat_input=False,
         | 
| 38 | 
            +
                    dropout_ratio=0.1,
         | 
| 39 | 
            +
                    num_classes=19,
         | 
| 40 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 41 | 
            +
                    align_corners=False,
         | 
| 42 | 
            +
                    loss_decode=dict(
         | 
| 43 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
         | 
| 44 | 
            +
                # model training and testing settings
         | 
| 45 | 
            +
                train_cfg=dict(),
         | 
| 46 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/emanet_r50-d8.py
    ADDED
    
    | @@ -0,0 +1,47 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                pretrained='open-mmlab://resnet50_v1c',
         | 
| 6 | 
            +
                backbone=dict(
         | 
| 7 | 
            +
                    type='ResNetV1c',
         | 
| 8 | 
            +
                    depth=50,
         | 
| 9 | 
            +
                    num_stages=4,
         | 
| 10 | 
            +
                    out_indices=(0, 1, 2, 3),
         | 
| 11 | 
            +
                    dilations=(1, 1, 2, 4),
         | 
| 12 | 
            +
                    strides=(1, 2, 1, 1),
         | 
| 13 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 14 | 
            +
                    norm_eval=False,
         | 
| 15 | 
            +
                    style='pytorch',
         | 
| 16 | 
            +
                    contract_dilation=True),
         | 
| 17 | 
            +
                decode_head=dict(
         | 
| 18 | 
            +
                    type='EMAHead',
         | 
| 19 | 
            +
                    in_channels=2048,
         | 
| 20 | 
            +
                    in_index=3,
         | 
| 21 | 
            +
                    channels=256,
         | 
| 22 | 
            +
                    ema_channels=512,
         | 
| 23 | 
            +
                    num_bases=64,
         | 
| 24 | 
            +
                    num_stages=3,
         | 
| 25 | 
            +
                    momentum=0.1,
         | 
| 26 | 
            +
                    dropout_ratio=0.1,
         | 
| 27 | 
            +
                    num_classes=19,
         | 
| 28 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 29 | 
            +
                    align_corners=False,
         | 
| 30 | 
            +
                    loss_decode=dict(
         | 
| 31 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 32 | 
            +
                auxiliary_head=dict(
         | 
| 33 | 
            +
                    type='FCNHead',
         | 
| 34 | 
            +
                    in_channels=1024,
         | 
| 35 | 
            +
                    in_index=2,
         | 
| 36 | 
            +
                    channels=256,
         | 
| 37 | 
            +
                    num_convs=1,
         | 
| 38 | 
            +
                    concat_input=False,
         | 
| 39 | 
            +
                    dropout_ratio=0.1,
         | 
| 40 | 
            +
                    num_classes=19,
         | 
| 41 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 42 | 
            +
                    align_corners=False,
         | 
| 43 | 
            +
                    loss_decode=dict(
         | 
| 44 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
         | 
| 45 | 
            +
                # model training and testing settings
         | 
| 46 | 
            +
                train_cfg=dict(),
         | 
| 47 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/encnet_r50-d8.py
    ADDED
    
    | @@ -0,0 +1,48 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                pretrained='open-mmlab://resnet50_v1c',
         | 
| 6 | 
            +
                backbone=dict(
         | 
| 7 | 
            +
                    type='ResNetV1c',
         | 
| 8 | 
            +
                    depth=50,
         | 
| 9 | 
            +
                    num_stages=4,
         | 
| 10 | 
            +
                    out_indices=(0, 1, 2, 3),
         | 
| 11 | 
            +
                    dilations=(1, 1, 2, 4),
         | 
| 12 | 
            +
                    strides=(1, 2, 1, 1),
         | 
| 13 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 14 | 
            +
                    norm_eval=False,
         | 
| 15 | 
            +
                    style='pytorch',
         | 
| 16 | 
            +
                    contract_dilation=True),
         | 
| 17 | 
            +
                decode_head=dict(
         | 
| 18 | 
            +
                    type='EncHead',
         | 
| 19 | 
            +
                    in_channels=[512, 1024, 2048],
         | 
| 20 | 
            +
                    in_index=(1, 2, 3),
         | 
| 21 | 
            +
                    channels=512,
         | 
| 22 | 
            +
                    num_codes=32,
         | 
| 23 | 
            +
                    use_se_loss=True,
         | 
| 24 | 
            +
                    add_lateral=False,
         | 
| 25 | 
            +
                    dropout_ratio=0.1,
         | 
| 26 | 
            +
                    num_classes=19,
         | 
| 27 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 28 | 
            +
                    align_corners=False,
         | 
| 29 | 
            +
                    loss_decode=dict(
         | 
| 30 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
         | 
| 31 | 
            +
                    loss_se_decode=dict(
         | 
| 32 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)),
         | 
| 33 | 
            +
                auxiliary_head=dict(
         | 
| 34 | 
            +
                    type='FCNHead',
         | 
| 35 | 
            +
                    in_channels=1024,
         | 
| 36 | 
            +
                    in_index=2,
         | 
| 37 | 
            +
                    channels=256,
         | 
| 38 | 
            +
                    num_convs=1,
         | 
| 39 | 
            +
                    concat_input=False,
         | 
| 40 | 
            +
                    dropout_ratio=0.1,
         | 
| 41 | 
            +
                    num_classes=19,
         | 
| 42 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 43 | 
            +
                    align_corners=False,
         | 
| 44 | 
            +
                    loss_decode=dict(
         | 
| 45 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
         | 
| 46 | 
            +
                # model training and testing settings
         | 
| 47 | 
            +
                train_cfg=dict(),
         | 
| 48 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/fast_scnn.py
    ADDED
    
    | @@ -0,0 +1,57 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                backbone=dict(
         | 
| 6 | 
            +
                    type='FastSCNN',
         | 
| 7 | 
            +
                    downsample_dw_channels=(32, 48),
         | 
| 8 | 
            +
                    global_in_channels=64,
         | 
| 9 | 
            +
                    global_block_channels=(64, 96, 128),
         | 
| 10 | 
            +
                    global_block_strides=(2, 2, 1),
         | 
| 11 | 
            +
                    global_out_channels=128,
         | 
| 12 | 
            +
                    higher_in_channels=64,
         | 
| 13 | 
            +
                    lower_in_channels=128,
         | 
| 14 | 
            +
                    fusion_out_channels=128,
         | 
| 15 | 
            +
                    out_indices=(0, 1, 2),
         | 
| 16 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 17 | 
            +
                    align_corners=False),
         | 
| 18 | 
            +
                decode_head=dict(
         | 
| 19 | 
            +
                    type='DepthwiseSeparableFCNHead',
         | 
| 20 | 
            +
                    in_channels=128,
         | 
| 21 | 
            +
                    channels=128,
         | 
| 22 | 
            +
                    concat_input=False,
         | 
| 23 | 
            +
                    num_classes=19,
         | 
| 24 | 
            +
                    in_index=-1,
         | 
| 25 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 26 | 
            +
                    align_corners=False,
         | 
| 27 | 
            +
                    loss_decode=dict(
         | 
| 28 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
         | 
| 29 | 
            +
                auxiliary_head=[
         | 
| 30 | 
            +
                    dict(
         | 
| 31 | 
            +
                        type='FCNHead',
         | 
| 32 | 
            +
                        in_channels=128,
         | 
| 33 | 
            +
                        channels=32,
         | 
| 34 | 
            +
                        num_convs=1,
         | 
| 35 | 
            +
                        num_classes=19,
         | 
| 36 | 
            +
                        in_index=-2,
         | 
| 37 | 
            +
                        norm_cfg=norm_cfg,
         | 
| 38 | 
            +
                        concat_input=False,
         | 
| 39 | 
            +
                        align_corners=False,
         | 
| 40 | 
            +
                        loss_decode=dict(
         | 
| 41 | 
            +
                            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
         | 
| 42 | 
            +
                    dict(
         | 
| 43 | 
            +
                        type='FCNHead',
         | 
| 44 | 
            +
                        in_channels=64,
         | 
| 45 | 
            +
                        channels=32,
         | 
| 46 | 
            +
                        num_convs=1,
         | 
| 47 | 
            +
                        num_classes=19,
         | 
| 48 | 
            +
                        in_index=-3,
         | 
| 49 | 
            +
                        norm_cfg=norm_cfg,
         | 
| 50 | 
            +
                        concat_input=False,
         | 
| 51 | 
            +
                        align_corners=False,
         | 
| 52 | 
            +
                        loss_decode=dict(
         | 
| 53 | 
            +
                            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
         | 
| 54 | 
            +
                ],
         | 
| 55 | 
            +
                # model training and testing settings
         | 
| 56 | 
            +
                train_cfg=dict(),
         | 
| 57 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/fcn_hr18.py
    ADDED
    
    | @@ -0,0 +1,52 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                pretrained='open-mmlab://msra/hrnetv2_w18',
         | 
| 6 | 
            +
                backbone=dict(
         | 
| 7 | 
            +
                    type='HRNet',
         | 
| 8 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 9 | 
            +
                    norm_eval=False,
         | 
| 10 | 
            +
                    extra=dict(
         | 
| 11 | 
            +
                        stage1=dict(
         | 
| 12 | 
            +
                            num_modules=1,
         | 
| 13 | 
            +
                            num_branches=1,
         | 
| 14 | 
            +
                            block='BOTTLENECK',
         | 
| 15 | 
            +
                            num_blocks=(4, ),
         | 
| 16 | 
            +
                            num_channels=(64, )),
         | 
| 17 | 
            +
                        stage2=dict(
         | 
| 18 | 
            +
                            num_modules=1,
         | 
| 19 | 
            +
                            num_branches=2,
         | 
| 20 | 
            +
                            block='BASIC',
         | 
| 21 | 
            +
                            num_blocks=(4, 4),
         | 
| 22 | 
            +
                            num_channels=(18, 36)),
         | 
| 23 | 
            +
                        stage3=dict(
         | 
| 24 | 
            +
                            num_modules=4,
         | 
| 25 | 
            +
                            num_branches=3,
         | 
| 26 | 
            +
                            block='BASIC',
         | 
| 27 | 
            +
                            num_blocks=(4, 4, 4),
         | 
| 28 | 
            +
                            num_channels=(18, 36, 72)),
         | 
| 29 | 
            +
                        stage4=dict(
         | 
| 30 | 
            +
                            num_modules=3,
         | 
| 31 | 
            +
                            num_branches=4,
         | 
| 32 | 
            +
                            block='BASIC',
         | 
| 33 | 
            +
                            num_blocks=(4, 4, 4, 4),
         | 
| 34 | 
            +
                            num_channels=(18, 36, 72, 144)))),
         | 
| 35 | 
            +
                decode_head=dict(
         | 
| 36 | 
            +
                    type='FCNHead',
         | 
| 37 | 
            +
                    in_channels=[18, 36, 72, 144],
         | 
| 38 | 
            +
                    in_index=(0, 1, 2, 3),
         | 
| 39 | 
            +
                    channels=sum([18, 36, 72, 144]),
         | 
| 40 | 
            +
                    input_transform='resize_concat',
         | 
| 41 | 
            +
                    kernel_size=1,
         | 
| 42 | 
            +
                    num_convs=1,
         | 
| 43 | 
            +
                    concat_input=False,
         | 
| 44 | 
            +
                    dropout_ratio=-1,
         | 
| 45 | 
            +
                    num_classes=19,
         | 
| 46 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 47 | 
            +
                    align_corners=False,
         | 
| 48 | 
            +
                    loss_decode=dict(
         | 
| 49 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 50 | 
            +
                # model training and testing settings
         | 
| 51 | 
            +
                train_cfg=dict(),
         | 
| 52 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/fcn_r50-d8.py
    ADDED
    
    | @@ -0,0 +1,45 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                pretrained='open-mmlab://resnet50_v1c',
         | 
| 6 | 
            +
                backbone=dict(
         | 
| 7 | 
            +
                    type='ResNetV1c',
         | 
| 8 | 
            +
                    depth=50,
         | 
| 9 | 
            +
                    num_stages=4,
         | 
| 10 | 
            +
                    out_indices=(0, 1, 2, 3),
         | 
| 11 | 
            +
                    dilations=(1, 1, 2, 4),
         | 
| 12 | 
            +
                    strides=(1, 2, 1, 1),
         | 
| 13 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 14 | 
            +
                    norm_eval=False,
         | 
| 15 | 
            +
                    style='pytorch',
         | 
| 16 | 
            +
                    contract_dilation=True),
         | 
| 17 | 
            +
                decode_head=dict(
         | 
| 18 | 
            +
                    type='FCNHead',
         | 
| 19 | 
            +
                    in_channels=2048,
         | 
| 20 | 
            +
                    in_index=3,
         | 
| 21 | 
            +
                    channels=512,
         | 
| 22 | 
            +
                    num_convs=2,
         | 
| 23 | 
            +
                    concat_input=True,
         | 
| 24 | 
            +
                    dropout_ratio=0.1,
         | 
| 25 | 
            +
                    num_classes=19,
         | 
| 26 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 27 | 
            +
                    align_corners=False,
         | 
| 28 | 
            +
                    loss_decode=dict(
         | 
| 29 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 30 | 
            +
                auxiliary_head=dict(
         | 
| 31 | 
            +
                    type='FCNHead',
         | 
| 32 | 
            +
                    in_channels=1024,
         | 
| 33 | 
            +
                    in_index=2,
         | 
| 34 | 
            +
                    channels=256,
         | 
| 35 | 
            +
                    num_convs=1,
         | 
| 36 | 
            +
                    concat_input=False,
         | 
| 37 | 
            +
                    dropout_ratio=0.1,
         | 
| 38 | 
            +
                    num_classes=19,
         | 
| 39 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 40 | 
            +
                    align_corners=False,
         | 
| 41 | 
            +
                    loss_decode=dict(
         | 
| 42 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
         | 
| 43 | 
            +
                # model training and testing settings
         | 
| 44 | 
            +
                train_cfg=dict(),
         | 
| 45 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/fcn_unet_s5-d16.py
    ADDED
    
    | @@ -0,0 +1,51 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                pretrained=None,
         | 
| 6 | 
            +
                backbone=dict(
         | 
| 7 | 
            +
                    type='UNet',
         | 
| 8 | 
            +
                    in_channels=3,
         | 
| 9 | 
            +
                    base_channels=64,
         | 
| 10 | 
            +
                    num_stages=5,
         | 
| 11 | 
            +
                    strides=(1, 1, 1, 1, 1),
         | 
| 12 | 
            +
                    enc_num_convs=(2, 2, 2, 2, 2),
         | 
| 13 | 
            +
                    dec_num_convs=(2, 2, 2, 2),
         | 
| 14 | 
            +
                    downsamples=(True, True, True, True),
         | 
| 15 | 
            +
                    enc_dilations=(1, 1, 1, 1, 1),
         | 
| 16 | 
            +
                    dec_dilations=(1, 1, 1, 1),
         | 
| 17 | 
            +
                    with_cp=False,
         | 
| 18 | 
            +
                    conv_cfg=None,
         | 
| 19 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 20 | 
            +
                    act_cfg=dict(type='ReLU'),
         | 
| 21 | 
            +
                    upsample_cfg=dict(type='InterpConv'),
         | 
| 22 | 
            +
                    norm_eval=False),
         | 
| 23 | 
            +
                decode_head=dict(
         | 
| 24 | 
            +
                    type='FCNHead',
         | 
| 25 | 
            +
                    in_channels=64,
         | 
| 26 | 
            +
                    in_index=4,
         | 
| 27 | 
            +
                    channels=64,
         | 
| 28 | 
            +
                    num_convs=1,
         | 
| 29 | 
            +
                    concat_input=False,
         | 
| 30 | 
            +
                    dropout_ratio=0.1,
         | 
| 31 | 
            +
                    num_classes=2,
         | 
| 32 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 33 | 
            +
                    align_corners=False,
         | 
| 34 | 
            +
                    loss_decode=dict(
         | 
| 35 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 36 | 
            +
                auxiliary_head=dict(
         | 
| 37 | 
            +
                    type='FCNHead',
         | 
| 38 | 
            +
                    in_channels=128,
         | 
| 39 | 
            +
                    in_index=3,
         | 
| 40 | 
            +
                    channels=64,
         | 
| 41 | 
            +
                    num_convs=1,
         | 
| 42 | 
            +
                    concat_input=False,
         | 
| 43 | 
            +
                    dropout_ratio=0.1,
         | 
| 44 | 
            +
                    num_classes=2,
         | 
| 45 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 46 | 
            +
                    align_corners=False,
         | 
| 47 | 
            +
                    loss_decode=dict(
         | 
| 48 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
         | 
| 49 | 
            +
                # model training and testing settings
         | 
| 50 | 
            +
                train_cfg=dict(),
         | 
| 51 | 
            +
                test_cfg=dict(mode='slide', crop_size=256, stride=170))
         | 
    	
        annotator/uniformer/configs/_base_/models/fpn_r50.py
    ADDED
    
    | @@ -0,0 +1,36 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                pretrained='open-mmlab://resnet50_v1c',
         | 
| 6 | 
            +
                backbone=dict(
         | 
| 7 | 
            +
                    type='ResNetV1c',
         | 
| 8 | 
            +
                    depth=50,
         | 
| 9 | 
            +
                    num_stages=4,
         | 
| 10 | 
            +
                    out_indices=(0, 1, 2, 3),
         | 
| 11 | 
            +
                    dilations=(1, 1, 1, 1),
         | 
| 12 | 
            +
                    strides=(1, 2, 2, 2),
         | 
| 13 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 14 | 
            +
                    norm_eval=False,
         | 
| 15 | 
            +
                    style='pytorch',
         | 
| 16 | 
            +
                    contract_dilation=True),
         | 
| 17 | 
            +
                neck=dict(
         | 
| 18 | 
            +
                    type='FPN',
         | 
| 19 | 
            +
                    in_channels=[256, 512, 1024, 2048],
         | 
| 20 | 
            +
                    out_channels=256,
         | 
| 21 | 
            +
                    num_outs=4),
         | 
| 22 | 
            +
                decode_head=dict(
         | 
| 23 | 
            +
                    type='FPNHead',
         | 
| 24 | 
            +
                    in_channels=[256, 256, 256, 256],
         | 
| 25 | 
            +
                    in_index=[0, 1, 2, 3],
         | 
| 26 | 
            +
                    feature_strides=[4, 8, 16, 32],
         | 
| 27 | 
            +
                    channels=128,
         | 
| 28 | 
            +
                    dropout_ratio=0.1,
         | 
| 29 | 
            +
                    num_classes=19,
         | 
| 30 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 31 | 
            +
                    align_corners=False,
         | 
| 32 | 
            +
                    loss_decode=dict(
         | 
| 33 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 34 | 
            +
                # model training and testing settings
         | 
| 35 | 
            +
                train_cfg=dict(),
         | 
| 36 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/fpn_uniformer.py
    ADDED
    
    | @@ -0,0 +1,35 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                backbone=dict(
         | 
| 6 | 
            +
                    type='UniFormer',
         | 
| 7 | 
            +
                    embed_dim=[64, 128, 320, 512],
         | 
| 8 | 
            +
                    layers=[3, 4, 8, 3],
         | 
| 9 | 
            +
                    head_dim=64,
         | 
| 10 | 
            +
                    mlp_ratio=4.,
         | 
| 11 | 
            +
                    qkv_bias=True,
         | 
| 12 | 
            +
                    drop_rate=0.,
         | 
| 13 | 
            +
                    attn_drop_rate=0.,
         | 
| 14 | 
            +
                    drop_path_rate=0.1),
         | 
| 15 | 
            +
                neck=dict(
         | 
| 16 | 
            +
                    type='FPN',
         | 
| 17 | 
            +
                    in_channels=[64, 128, 320, 512],
         | 
| 18 | 
            +
                    out_channels=256,
         | 
| 19 | 
            +
                    num_outs=4),
         | 
| 20 | 
            +
                decode_head=dict(
         | 
| 21 | 
            +
                    type='FPNHead',
         | 
| 22 | 
            +
                    in_channels=[256, 256, 256, 256],
         | 
| 23 | 
            +
                    in_index=[0, 1, 2, 3],
         | 
| 24 | 
            +
                    feature_strides=[4, 8, 16, 32],
         | 
| 25 | 
            +
                    channels=128,
         | 
| 26 | 
            +
                    dropout_ratio=0.1,
         | 
| 27 | 
            +
                    num_classes=150,
         | 
| 28 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 29 | 
            +
                    align_corners=False,
         | 
| 30 | 
            +
                    loss_decode=dict(
         | 
| 31 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 32 | 
            +
                # model training and testing settings
         | 
| 33 | 
            +
                train_cfg=dict(),
         | 
| 34 | 
            +
                test_cfg=dict(mode='whole')
         | 
| 35 | 
            +
            )
         | 
    	
        annotator/uniformer/configs/_base_/models/gcnet_r50-d8.py
    ADDED
    
    | @@ -0,0 +1,46 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                pretrained='open-mmlab://resnet50_v1c',
         | 
| 6 | 
            +
                backbone=dict(
         | 
| 7 | 
            +
                    type='ResNetV1c',
         | 
| 8 | 
            +
                    depth=50,
         | 
| 9 | 
            +
                    num_stages=4,
         | 
| 10 | 
            +
                    out_indices=(0, 1, 2, 3),
         | 
| 11 | 
            +
                    dilations=(1, 1, 2, 4),
         | 
| 12 | 
            +
                    strides=(1, 2, 1, 1),
         | 
| 13 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 14 | 
            +
                    norm_eval=False,
         | 
| 15 | 
            +
                    style='pytorch',
         | 
| 16 | 
            +
                    contract_dilation=True),
         | 
| 17 | 
            +
                decode_head=dict(
         | 
| 18 | 
            +
                    type='GCHead',
         | 
| 19 | 
            +
                    in_channels=2048,
         | 
| 20 | 
            +
                    in_index=3,
         | 
| 21 | 
            +
                    channels=512,
         | 
| 22 | 
            +
                    ratio=1 / 4.,
         | 
| 23 | 
            +
                    pooling_type='att',
         | 
| 24 | 
            +
                    fusion_types=('channel_add', ),
         | 
| 25 | 
            +
                    dropout_ratio=0.1,
         | 
| 26 | 
            +
                    num_classes=19,
         | 
| 27 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 28 | 
            +
                    align_corners=False,
         | 
| 29 | 
            +
                    loss_decode=dict(
         | 
| 30 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 31 | 
            +
                auxiliary_head=dict(
         | 
| 32 | 
            +
                    type='FCNHead',
         | 
| 33 | 
            +
                    in_channels=1024,
         | 
| 34 | 
            +
                    in_index=2,
         | 
| 35 | 
            +
                    channels=256,
         | 
| 36 | 
            +
                    num_convs=1,
         | 
| 37 | 
            +
                    concat_input=False,
         | 
| 38 | 
            +
                    dropout_ratio=0.1,
         | 
| 39 | 
            +
                    num_classes=19,
         | 
| 40 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 41 | 
            +
                    align_corners=False,
         | 
| 42 | 
            +
                    loss_decode=dict(
         | 
| 43 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
         | 
| 44 | 
            +
                # model training and testing settings
         | 
| 45 | 
            +
                train_cfg=dict(),
         | 
| 46 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/lraspp_m-v3-d8.py
    ADDED
    
    | @@ -0,0 +1,25 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                backbone=dict(
         | 
| 6 | 
            +
                    type='MobileNetV3',
         | 
| 7 | 
            +
                    arch='large',
         | 
| 8 | 
            +
                    out_indices=(1, 3, 16),
         | 
| 9 | 
            +
                    norm_cfg=norm_cfg),
         | 
| 10 | 
            +
                decode_head=dict(
         | 
| 11 | 
            +
                    type='LRASPPHead',
         | 
| 12 | 
            +
                    in_channels=(16, 24, 960),
         | 
| 13 | 
            +
                    in_index=(0, 1, 2),
         | 
| 14 | 
            +
                    channels=128,
         | 
| 15 | 
            +
                    input_transform='multiple_select',
         | 
| 16 | 
            +
                    dropout_ratio=0.1,
         | 
| 17 | 
            +
                    num_classes=19,
         | 
| 18 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 19 | 
            +
                    act_cfg=dict(type='ReLU'),
         | 
| 20 | 
            +
                    align_corners=False,
         | 
| 21 | 
            +
                    loss_decode=dict(
         | 
| 22 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 23 | 
            +
                # model training and testing settings
         | 
| 24 | 
            +
                train_cfg=dict(),
         | 
| 25 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/nonlocal_r50-d8.py
    ADDED
    
    | @@ -0,0 +1,46 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                pretrained='open-mmlab://resnet50_v1c',
         | 
| 6 | 
            +
                backbone=dict(
         | 
| 7 | 
            +
                    type='ResNetV1c',
         | 
| 8 | 
            +
                    depth=50,
         | 
| 9 | 
            +
                    num_stages=4,
         | 
| 10 | 
            +
                    out_indices=(0, 1, 2, 3),
         | 
| 11 | 
            +
                    dilations=(1, 1, 2, 4),
         | 
| 12 | 
            +
                    strides=(1, 2, 1, 1),
         | 
| 13 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 14 | 
            +
                    norm_eval=False,
         | 
| 15 | 
            +
                    style='pytorch',
         | 
| 16 | 
            +
                    contract_dilation=True),
         | 
| 17 | 
            +
                decode_head=dict(
         | 
| 18 | 
            +
                    type='NLHead',
         | 
| 19 | 
            +
                    in_channels=2048,
         | 
| 20 | 
            +
                    in_index=3,
         | 
| 21 | 
            +
                    channels=512,
         | 
| 22 | 
            +
                    dropout_ratio=0.1,
         | 
| 23 | 
            +
                    reduction=2,
         | 
| 24 | 
            +
                    use_scale=True,
         | 
| 25 | 
            +
                    mode='embedded_gaussian',
         | 
| 26 | 
            +
                    num_classes=19,
         | 
| 27 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 28 | 
            +
                    align_corners=False,
         | 
| 29 | 
            +
                    loss_decode=dict(
         | 
| 30 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 31 | 
            +
                auxiliary_head=dict(
         | 
| 32 | 
            +
                    type='FCNHead',
         | 
| 33 | 
            +
                    in_channels=1024,
         | 
| 34 | 
            +
                    in_index=2,
         | 
| 35 | 
            +
                    channels=256,
         | 
| 36 | 
            +
                    num_convs=1,
         | 
| 37 | 
            +
                    concat_input=False,
         | 
| 38 | 
            +
                    dropout_ratio=0.1,
         | 
| 39 | 
            +
                    num_classes=19,
         | 
| 40 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 41 | 
            +
                    align_corners=False,
         | 
| 42 | 
            +
                    loss_decode=dict(
         | 
| 43 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
         | 
| 44 | 
            +
                # model training and testing settings
         | 
| 45 | 
            +
                train_cfg=dict(),
         | 
| 46 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/ocrnet_hr18.py
    ADDED
    
    | @@ -0,0 +1,68 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='CascadeEncoderDecoder',
         | 
| 5 | 
            +
                num_stages=2,
         | 
| 6 | 
            +
                pretrained='open-mmlab://msra/hrnetv2_w18',
         | 
| 7 | 
            +
                backbone=dict(
         | 
| 8 | 
            +
                    type='HRNet',
         | 
| 9 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 10 | 
            +
                    norm_eval=False,
         | 
| 11 | 
            +
                    extra=dict(
         | 
| 12 | 
            +
                        stage1=dict(
         | 
| 13 | 
            +
                            num_modules=1,
         | 
| 14 | 
            +
                            num_branches=1,
         | 
| 15 | 
            +
                            block='BOTTLENECK',
         | 
| 16 | 
            +
                            num_blocks=(4, ),
         | 
| 17 | 
            +
                            num_channels=(64, )),
         | 
| 18 | 
            +
                        stage2=dict(
         | 
| 19 | 
            +
                            num_modules=1,
         | 
| 20 | 
            +
                            num_branches=2,
         | 
| 21 | 
            +
                            block='BASIC',
         | 
| 22 | 
            +
                            num_blocks=(4, 4),
         | 
| 23 | 
            +
                            num_channels=(18, 36)),
         | 
| 24 | 
            +
                        stage3=dict(
         | 
| 25 | 
            +
                            num_modules=4,
         | 
| 26 | 
            +
                            num_branches=3,
         | 
| 27 | 
            +
                            block='BASIC',
         | 
| 28 | 
            +
                            num_blocks=(4, 4, 4),
         | 
| 29 | 
            +
                            num_channels=(18, 36, 72)),
         | 
| 30 | 
            +
                        stage4=dict(
         | 
| 31 | 
            +
                            num_modules=3,
         | 
| 32 | 
            +
                            num_branches=4,
         | 
| 33 | 
            +
                            block='BASIC',
         | 
| 34 | 
            +
                            num_blocks=(4, 4, 4, 4),
         | 
| 35 | 
            +
                            num_channels=(18, 36, 72, 144)))),
         | 
| 36 | 
            +
                decode_head=[
         | 
| 37 | 
            +
                    dict(
         | 
| 38 | 
            +
                        type='FCNHead',
         | 
| 39 | 
            +
                        in_channels=[18, 36, 72, 144],
         | 
| 40 | 
            +
                        channels=sum([18, 36, 72, 144]),
         | 
| 41 | 
            +
                        in_index=(0, 1, 2, 3),
         | 
| 42 | 
            +
                        input_transform='resize_concat',
         | 
| 43 | 
            +
                        kernel_size=1,
         | 
| 44 | 
            +
                        num_convs=1,
         | 
| 45 | 
            +
                        concat_input=False,
         | 
| 46 | 
            +
                        dropout_ratio=-1,
         | 
| 47 | 
            +
                        num_classes=19,
         | 
| 48 | 
            +
                        norm_cfg=norm_cfg,
         | 
| 49 | 
            +
                        align_corners=False,
         | 
| 50 | 
            +
                        loss_decode=dict(
         | 
| 51 | 
            +
                            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
         | 
| 52 | 
            +
                    dict(
         | 
| 53 | 
            +
                        type='OCRHead',
         | 
| 54 | 
            +
                        in_channels=[18, 36, 72, 144],
         | 
| 55 | 
            +
                        in_index=(0, 1, 2, 3),
         | 
| 56 | 
            +
                        input_transform='resize_concat',
         | 
| 57 | 
            +
                        channels=512,
         | 
| 58 | 
            +
                        ocr_channels=256,
         | 
| 59 | 
            +
                        dropout_ratio=-1,
         | 
| 60 | 
            +
                        num_classes=19,
         | 
| 61 | 
            +
                        norm_cfg=norm_cfg,
         | 
| 62 | 
            +
                        align_corners=False,
         | 
| 63 | 
            +
                        loss_decode=dict(
         | 
| 64 | 
            +
                            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 65 | 
            +
                ],
         | 
| 66 | 
            +
                # model training and testing settings
         | 
| 67 | 
            +
                train_cfg=dict(),
         | 
| 68 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/ocrnet_r50-d8.py
    ADDED
    
    | @@ -0,0 +1,47 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='CascadeEncoderDecoder',
         | 
| 5 | 
            +
                num_stages=2,
         | 
| 6 | 
            +
                pretrained='open-mmlab://resnet50_v1c',
         | 
| 7 | 
            +
                backbone=dict(
         | 
| 8 | 
            +
                    type='ResNetV1c',
         | 
| 9 | 
            +
                    depth=50,
         | 
| 10 | 
            +
                    num_stages=4,
         | 
| 11 | 
            +
                    out_indices=(0, 1, 2, 3),
         | 
| 12 | 
            +
                    dilations=(1, 1, 2, 4),
         | 
| 13 | 
            +
                    strides=(1, 2, 1, 1),
         | 
| 14 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 15 | 
            +
                    norm_eval=False,
         | 
| 16 | 
            +
                    style='pytorch',
         | 
| 17 | 
            +
                    contract_dilation=True),
         | 
| 18 | 
            +
                decode_head=[
         | 
| 19 | 
            +
                    dict(
         | 
| 20 | 
            +
                        type='FCNHead',
         | 
| 21 | 
            +
                        in_channels=1024,
         | 
| 22 | 
            +
                        in_index=2,
         | 
| 23 | 
            +
                        channels=256,
         | 
| 24 | 
            +
                        num_convs=1,
         | 
| 25 | 
            +
                        concat_input=False,
         | 
| 26 | 
            +
                        dropout_ratio=0.1,
         | 
| 27 | 
            +
                        num_classes=19,
         | 
| 28 | 
            +
                        norm_cfg=norm_cfg,
         | 
| 29 | 
            +
                        align_corners=False,
         | 
| 30 | 
            +
                        loss_decode=dict(
         | 
| 31 | 
            +
                            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
         | 
| 32 | 
            +
                    dict(
         | 
| 33 | 
            +
                        type='OCRHead',
         | 
| 34 | 
            +
                        in_channels=2048,
         | 
| 35 | 
            +
                        in_index=3,
         | 
| 36 | 
            +
                        channels=512,
         | 
| 37 | 
            +
                        ocr_channels=256,
         | 
| 38 | 
            +
                        dropout_ratio=0.1,
         | 
| 39 | 
            +
                        num_classes=19,
         | 
| 40 | 
            +
                        norm_cfg=norm_cfg,
         | 
| 41 | 
            +
                        align_corners=False,
         | 
| 42 | 
            +
                        loss_decode=dict(
         | 
| 43 | 
            +
                            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
         | 
| 44 | 
            +
                ],
         | 
| 45 | 
            +
                # model training and testing settings
         | 
| 46 | 
            +
                train_cfg=dict(),
         | 
| 47 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/pointrend_r50.py
    ADDED
    
    | @@ -0,0 +1,56 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='CascadeEncoderDecoder',
         | 
| 5 | 
            +
                num_stages=2,
         | 
| 6 | 
            +
                pretrained='open-mmlab://resnet50_v1c',
         | 
| 7 | 
            +
                backbone=dict(
         | 
| 8 | 
            +
                    type='ResNetV1c',
         | 
| 9 | 
            +
                    depth=50,
         | 
| 10 | 
            +
                    num_stages=4,
         | 
| 11 | 
            +
                    out_indices=(0, 1, 2, 3),
         | 
| 12 | 
            +
                    dilations=(1, 1, 1, 1),
         | 
| 13 | 
            +
                    strides=(1, 2, 2, 2),
         | 
| 14 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 15 | 
            +
                    norm_eval=False,
         | 
| 16 | 
            +
                    style='pytorch',
         | 
| 17 | 
            +
                    contract_dilation=True),
         | 
| 18 | 
            +
                neck=dict(
         | 
| 19 | 
            +
                    type='FPN',
         | 
| 20 | 
            +
                    in_channels=[256, 512, 1024, 2048],
         | 
| 21 | 
            +
                    out_channels=256,
         | 
| 22 | 
            +
                    num_outs=4),
         | 
| 23 | 
            +
                decode_head=[
         | 
| 24 | 
            +
                    dict(
         | 
| 25 | 
            +
                        type='FPNHead',
         | 
| 26 | 
            +
                        in_channels=[256, 256, 256, 256],
         | 
| 27 | 
            +
                        in_index=[0, 1, 2, 3],
         | 
| 28 | 
            +
                        feature_strides=[4, 8, 16, 32],
         | 
| 29 | 
            +
                        channels=128,
         | 
| 30 | 
            +
                        dropout_ratio=-1,
         | 
| 31 | 
            +
                        num_classes=19,
         | 
| 32 | 
            +
                        norm_cfg=norm_cfg,
         | 
| 33 | 
            +
                        align_corners=False,
         | 
| 34 | 
            +
                        loss_decode=dict(
         | 
| 35 | 
            +
                            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 36 | 
            +
                    dict(
         | 
| 37 | 
            +
                        type='PointHead',
         | 
| 38 | 
            +
                        in_channels=[256],
         | 
| 39 | 
            +
                        in_index=[0],
         | 
| 40 | 
            +
                        channels=256,
         | 
| 41 | 
            +
                        num_fcs=3,
         | 
| 42 | 
            +
                        coarse_pred_each_layer=True,
         | 
| 43 | 
            +
                        dropout_ratio=-1,
         | 
| 44 | 
            +
                        num_classes=19,
         | 
| 45 | 
            +
                        align_corners=False,
         | 
| 46 | 
            +
                        loss_decode=dict(
         | 
| 47 | 
            +
                            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
         | 
| 48 | 
            +
                ],
         | 
| 49 | 
            +
                # model training and testing settings
         | 
| 50 | 
            +
                train_cfg=dict(
         | 
| 51 | 
            +
                    num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75),
         | 
| 52 | 
            +
                test_cfg=dict(
         | 
| 53 | 
            +
                    mode='whole',
         | 
| 54 | 
            +
                    subdivision_steps=2,
         | 
| 55 | 
            +
                    subdivision_num_points=8196,
         | 
| 56 | 
            +
                    scale_factor=2))
         | 
    	
        annotator/uniformer/configs/_base_/models/psanet_r50-d8.py
    ADDED
    
    | @@ -0,0 +1,49 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                pretrained='open-mmlab://resnet50_v1c',
         | 
| 6 | 
            +
                backbone=dict(
         | 
| 7 | 
            +
                    type='ResNetV1c',
         | 
| 8 | 
            +
                    depth=50,
         | 
| 9 | 
            +
                    num_stages=4,
         | 
| 10 | 
            +
                    out_indices=(0, 1, 2, 3),
         | 
| 11 | 
            +
                    dilations=(1, 1, 2, 4),
         | 
| 12 | 
            +
                    strides=(1, 2, 1, 1),
         | 
| 13 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 14 | 
            +
                    norm_eval=False,
         | 
| 15 | 
            +
                    style='pytorch',
         | 
| 16 | 
            +
                    contract_dilation=True),
         | 
| 17 | 
            +
                decode_head=dict(
         | 
| 18 | 
            +
                    type='PSAHead',
         | 
| 19 | 
            +
                    in_channels=2048,
         | 
| 20 | 
            +
                    in_index=3,
         | 
| 21 | 
            +
                    channels=512,
         | 
| 22 | 
            +
                    mask_size=(97, 97),
         | 
| 23 | 
            +
                    psa_type='bi-direction',
         | 
| 24 | 
            +
                    compact=False,
         | 
| 25 | 
            +
                    shrink_factor=2,
         | 
| 26 | 
            +
                    normalization_factor=1.0,
         | 
| 27 | 
            +
                    psa_softmax=True,
         | 
| 28 | 
            +
                    dropout_ratio=0.1,
         | 
| 29 | 
            +
                    num_classes=19,
         | 
| 30 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 31 | 
            +
                    align_corners=False,
         | 
| 32 | 
            +
                    loss_decode=dict(
         | 
| 33 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 34 | 
            +
                auxiliary_head=dict(
         | 
| 35 | 
            +
                    type='FCNHead',
         | 
| 36 | 
            +
                    in_channels=1024,
         | 
| 37 | 
            +
                    in_index=2,
         | 
| 38 | 
            +
                    channels=256,
         | 
| 39 | 
            +
                    num_convs=1,
         | 
| 40 | 
            +
                    concat_input=False,
         | 
| 41 | 
            +
                    dropout_ratio=0.1,
         | 
| 42 | 
            +
                    num_classes=19,
         | 
| 43 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 44 | 
            +
                    align_corners=False,
         | 
| 45 | 
            +
                    loss_decode=dict(
         | 
| 46 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
         | 
| 47 | 
            +
                # model training and testing settings
         | 
| 48 | 
            +
                train_cfg=dict(),
         | 
| 49 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/pspnet_r50-d8.py
    ADDED
    
    | @@ -0,0 +1,44 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                pretrained='open-mmlab://resnet50_v1c',
         | 
| 6 | 
            +
                backbone=dict(
         | 
| 7 | 
            +
                    type='ResNetV1c',
         | 
| 8 | 
            +
                    depth=50,
         | 
| 9 | 
            +
                    num_stages=4,
         | 
| 10 | 
            +
                    out_indices=(0, 1, 2, 3),
         | 
| 11 | 
            +
                    dilations=(1, 1, 2, 4),
         | 
| 12 | 
            +
                    strides=(1, 2, 1, 1),
         | 
| 13 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 14 | 
            +
                    norm_eval=False,
         | 
| 15 | 
            +
                    style='pytorch',
         | 
| 16 | 
            +
                    contract_dilation=True),
         | 
| 17 | 
            +
                decode_head=dict(
         | 
| 18 | 
            +
                    type='PSPHead',
         | 
| 19 | 
            +
                    in_channels=2048,
         | 
| 20 | 
            +
                    in_index=3,
         | 
| 21 | 
            +
                    channels=512,
         | 
| 22 | 
            +
                    pool_scales=(1, 2, 3, 6),
         | 
| 23 | 
            +
                    dropout_ratio=0.1,
         | 
| 24 | 
            +
                    num_classes=19,
         | 
| 25 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 26 | 
            +
                    align_corners=False,
         | 
| 27 | 
            +
                    loss_decode=dict(
         | 
| 28 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 29 | 
            +
                auxiliary_head=dict(
         | 
| 30 | 
            +
                    type='FCNHead',
         | 
| 31 | 
            +
                    in_channels=1024,
         | 
| 32 | 
            +
                    in_index=2,
         | 
| 33 | 
            +
                    channels=256,
         | 
| 34 | 
            +
                    num_convs=1,
         | 
| 35 | 
            +
                    concat_input=False,
         | 
| 36 | 
            +
                    dropout_ratio=0.1,
         | 
| 37 | 
            +
                    num_classes=19,
         | 
| 38 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 39 | 
            +
                    align_corners=False,
         | 
| 40 | 
            +
                    loss_decode=dict(
         | 
| 41 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
         | 
| 42 | 
            +
                # model training and testing settings
         | 
| 43 | 
            +
                train_cfg=dict(),
         | 
| 44 | 
            +
                test_cfg=dict(mode='whole'))
         | 
    	
        annotator/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py
    ADDED
    
    | @@ -0,0 +1,50 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                pretrained=None,
         | 
| 6 | 
            +
                backbone=dict(
         | 
| 7 | 
            +
                    type='UNet',
         | 
| 8 | 
            +
                    in_channels=3,
         | 
| 9 | 
            +
                    base_channels=64,
         | 
| 10 | 
            +
                    num_stages=5,
         | 
| 11 | 
            +
                    strides=(1, 1, 1, 1, 1),
         | 
| 12 | 
            +
                    enc_num_convs=(2, 2, 2, 2, 2),
         | 
| 13 | 
            +
                    dec_num_convs=(2, 2, 2, 2),
         | 
| 14 | 
            +
                    downsamples=(True, True, True, True),
         | 
| 15 | 
            +
                    enc_dilations=(1, 1, 1, 1, 1),
         | 
| 16 | 
            +
                    dec_dilations=(1, 1, 1, 1),
         | 
| 17 | 
            +
                    with_cp=False,
         | 
| 18 | 
            +
                    conv_cfg=None,
         | 
| 19 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 20 | 
            +
                    act_cfg=dict(type='ReLU'),
         | 
| 21 | 
            +
                    upsample_cfg=dict(type='InterpConv'),
         | 
| 22 | 
            +
                    norm_eval=False),
         | 
| 23 | 
            +
                decode_head=dict(
         | 
| 24 | 
            +
                    type='PSPHead',
         | 
| 25 | 
            +
                    in_channels=64,
         | 
| 26 | 
            +
                    in_index=4,
         | 
| 27 | 
            +
                    channels=16,
         | 
| 28 | 
            +
                    pool_scales=(1, 2, 3, 6),
         | 
| 29 | 
            +
                    dropout_ratio=0.1,
         | 
| 30 | 
            +
                    num_classes=2,
         | 
| 31 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 32 | 
            +
                    align_corners=False,
         | 
| 33 | 
            +
                    loss_decode=dict(
         | 
| 34 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 35 | 
            +
                auxiliary_head=dict(
         | 
| 36 | 
            +
                    type='FCNHead',
         | 
| 37 | 
            +
                    in_channels=128,
         | 
| 38 | 
            +
                    in_index=3,
         | 
| 39 | 
            +
                    channels=64,
         | 
| 40 | 
            +
                    num_convs=1,
         | 
| 41 | 
            +
                    concat_input=False,
         | 
| 42 | 
            +
                    dropout_ratio=0.1,
         | 
| 43 | 
            +
                    num_classes=2,
         | 
| 44 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 45 | 
            +
                    align_corners=False,
         | 
| 46 | 
            +
                    loss_decode=dict(
         | 
| 47 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
         | 
| 48 | 
            +
                # model training and testing settings
         | 
| 49 | 
            +
                train_cfg=dict(),
         | 
| 50 | 
            +
                test_cfg=dict(mode='slide', crop_size=256, stride=170))
         | 
    	
        annotator/uniformer/configs/_base_/models/upernet_r50.py
    ADDED
    
    | @@ -0,0 +1,44 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # model settings
         | 
| 2 | 
            +
            norm_cfg = dict(type='SyncBN', requires_grad=True)
         | 
| 3 | 
            +
            model = dict(
         | 
| 4 | 
            +
                type='EncoderDecoder',
         | 
| 5 | 
            +
                pretrained='open-mmlab://resnet50_v1c',
         | 
| 6 | 
            +
                backbone=dict(
         | 
| 7 | 
            +
                    type='ResNetV1c',
         | 
| 8 | 
            +
                    depth=50,
         | 
| 9 | 
            +
                    num_stages=4,
         | 
| 10 | 
            +
                    out_indices=(0, 1, 2, 3),
         | 
| 11 | 
            +
                    dilations=(1, 1, 1, 1),
         | 
| 12 | 
            +
                    strides=(1, 2, 2, 2),
         | 
| 13 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 14 | 
            +
                    norm_eval=False,
         | 
| 15 | 
            +
                    style='pytorch',
         | 
| 16 | 
            +
                    contract_dilation=True),
         | 
| 17 | 
            +
                decode_head=dict(
         | 
| 18 | 
            +
                    type='UPerHead',
         | 
| 19 | 
            +
                    in_channels=[256, 512, 1024, 2048],
         | 
| 20 | 
            +
                    in_index=[0, 1, 2, 3],
         | 
| 21 | 
            +
                    pool_scales=(1, 2, 3, 6),
         | 
| 22 | 
            +
                    channels=512,
         | 
| 23 | 
            +
                    dropout_ratio=0.1,
         | 
| 24 | 
            +
                    num_classes=19,
         | 
| 25 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 26 | 
            +
                    align_corners=False,
         | 
| 27 | 
            +
                    loss_decode=dict(
         | 
| 28 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
         | 
| 29 | 
            +
                auxiliary_head=dict(
         | 
| 30 | 
            +
                    type='FCNHead',
         | 
| 31 | 
            +
                    in_channels=1024,
         | 
| 32 | 
            +
                    in_index=2,
         | 
| 33 | 
            +
                    channels=256,
         | 
| 34 | 
            +
                    num_convs=1,
         | 
| 35 | 
            +
                    concat_input=False,
         | 
| 36 | 
            +
                    dropout_ratio=0.1,
         | 
| 37 | 
            +
                    num_classes=19,
         | 
| 38 | 
            +
                    norm_cfg=norm_cfg,
         | 
| 39 | 
            +
                    align_corners=False,
         | 
| 40 | 
            +
                    loss_decode=dict(
         | 
| 41 | 
            +
                        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
         | 
| 42 | 
            +
                # model training and testing settings
         | 
| 43 | 
            +
                train_cfg=dict(),
         | 
| 44 | 
            +
                test_cfg=dict(mode='whole'))
         | 
 
			
